In [2]:
import pandas as pd
import plotly.graph_objs as go
import json as js

def cleandata(dataset, keepcolumns = ['Country Name', '1990', '2015'], value_variables = ['1990', '2015']):
    """Clean world bank data for a visualizaiton dashboard

    Keeps data range of dates in keep_columns variable and data for the top 10 economies
    Reorients the columns into a year, country and value
    Saves the results to a csv file

    Args:
        dataset (str): name of the csv data file

    Returns:
        None

    """    
    df = pd.read_csv(dataset, skiprows=4)

    # Keep only the columns of interest (years and country name)
    df = df[keepcolumns]

    top10country = ['United States', 'China', 'Japan', 'Germany', 'United Kingdom', 'India', 'France', 'Brazil', 'Italy', 'Canada']
    df = df[df['Country Name'].isin(top10country)]

    # melt year columns  and convert year to date time
    df_melt = df.melt(id_vars='Country Name', value_vars = value_variables)
    df_melt.columns = ['country','year', 'variable']
    df_melt['year'] = df_melt['year'].astype('datetime64[ns]').dt.year

    # output clean csv file
    return df_melt


In [73]:
import pandas as pd
import json as js
import plotly.express as px

def graph2():
    df = pd.read_csv('API_AG.LND.ARBL.HA.PC_DS2_en_csv_v2.csv', skiprows=4)

    # Filter for 1990 and 2015, top 10 economies
    df = df[['Country Name','1990', '2015']]
    countrylist = ['United States', 'China', 'Japan', 'Germany', 'United Kingdom', 'India', 'France', 'Brazil', 'Italy', 'Canada', 'Pakistan']
    df = df[df['Country Name'].isin(countrylist)]
    
    # melt year columns  and convert year to date time
    df_melt = df.melt(id_vars='Country Name', value_vars = ['1990', '2015'])
    df_melt.columns = ['country','year', 'hectaresarablelandperperson']
    df_melt['year'] = df_melt['year'].astype('datetime64[ns]').dt.year    


    # prepare data into x, y lists for plotting
    df_melt.sort_values('hectaresarablelandperperson', ascending=False, inplace=True)
    #df_melt.columns = ['year', 'percentrural']
    #df_melt = df_melt.to_json(orient='records')
    #json_df = js.loads(df_melt)
    return df_melt


In [75]:
fig = px.scatter(graph2(), x="year", y="hectaresarablelandperperson", color="country",
                 size="hectaresarablelandperperson", hover_data=['country'])
fig.show()

In [74]:
graph2().head(50)

Unnamed: 0,country,year,hectaresarablelandperperson
1,Canada,1990,1.637365
12,Canada,2015,1.216951
10,United States,1990,0.743826
21,United States,2015,0.474491
11,Brazil,2015,0.388504
0,Brazil,1990,0.339339
4,France,1990,0.303947
15,France,2015,0.277358
9,Pakistan,1990,0.276285
6,India,1990,0.187855
