### Importing data: countries' position and import/export flows 
We start from downloading the data from the [Eurostat transport database](https://ec.europa.eu/eurostat/web/transport/data/database). We are interested in the positions of the countries, thus latitude and longitude, so that we can use this information to plot the countries of interest into a world map. 
Then, we proceed with uploading the dataset containing the information of import and export activities and flows ('complete_dataset'). 
We update the position dataset with only the countries contained in the import/export dataset. 

In [None]:
import requests
import pandas as pd

<<<<<<< HEAD
# Download the csv with every country and the position (latitude and longitude)
=======
#Download the csv with countries and positions (latitude and longitude)
>>>>>>> 6e40c3fe69dbfdb78f68303afc83ebfd51df0415
url = 'https://developers.google.com/public-data/docs/canonical/countries_csv'
html = requests.get(url).content
df_list = pd.read_html(html)
df_position = df_list[-1]
<<<<<<< HEAD

#read the c
=======
#import the complete dataset
>>>>>>> 6e40c3fe69dbfdb78f68303afc83ebfd51df0415
complete_dataset = pd.read_csv('official_dataset.csv')

#create list "l" of countries : from position dataset (df_position), select and append only the countries that are also present
# in the official_dataset (Only EU countries)
l = []
for i in list(df_position['name']):
    l.append(i in list(set(complete_dataset['REPORTER'])))
print(l)

# update position dataframe (filtered in the step before)
df_position = df_position[l]
#export dataset
df_position.to_csv('position_countries.csv')


### Clean and merge datasets
 

In [None]:
import pandas as pd
import add_lat_lon_3
import os

def merge_database(complete_df, already_merged = []):
    '''
    Return the joined dataset and a list of the name of the file already merged.

    If already merged is not passed, it is considered empty.

    Parameters
    ----------
    complete_dataset: pandas DataFrame that you want to extend.
    '''
    l = []
    for dir in  os.listdir(str(os.getcwd()) + '/dataset_project_DSA'):
        if dir[0:4] == 'data' and dir not in already_merged:
            PATH = str(os.getcwd() + '/dataset_project_DSA/'+ dir +'/DS-1262527_1_Data.csv')
            data = pd.read_csv(PATH)
            data = add_lat_lon_3.clean_df(data)
            pos_countries = pd.read_csv(str(os.getcwd() + '/dataset_project_DSA/position_countries.csv'))
            data = add_lat_lon_3.add_lat_lon(data, pos_countries)
            l.append(data)
            already_merged.append(dir)
    datas = pd.concat(l)
    return datas, already_merged

if __name__ == '__main__':
    tup = merge_database([])
    tup[0].to_csv('./official_dataset.csv')

## Adding latitude and longitude information
We proceed with cleaning some countries' names and creating a function that appends latitute and longitude of each country name contained in the main dataset, in order to have one single csv that can be used as a proper base for analysis and plotting. 

In [None]:
import pandas as pd

def clean_df(df):
    '''
    Function that clean the name of the counties in the dataset
    (e.g.  from 'Belgium (incl. Luxembourg LU -> 1998)' to 'Belgium')

    Parameters
    ----------
    df: pandas dataFrame that is it necessary to be clean

    '''
    drop_df = df.loc[df['REPORTER'] == 'Czechia']
    df = df.drop(drop_df.index)
    df['REPORTER'] = df['REPORTER'].replace({'Belgium (incl. Luxembourg \'LU\' -> 1998)':'Belgium'})
    df['REPORTER'] = df['REPORTER'].replace({'Germany (incl. German Democratic Republic \'DD\' from 1991)':'Germany'})
    df['REPORTER'] = df['REPORTER'].replace({'Ireland (Eire)':'Ireland'})
    df['REPORTER'] = df['REPORTER'].replace({'Spain (incl. Canary Islands \'XB\' from 1997)':'Spain'})
    df['REPORTER'] = df['REPORTER'].replace({'France (incl. Saint Barth�lemy \'BL\' -> 2012; incl. French Guiana \'GF\', Guadeloupe \'GP\', Martinique \'MQ\', R�union \'RE\' from 1997; incl. Mayotte \'YT\' from 2014)':'France'})
    df['REPORTER'] = df['REPORTER'].replace({'Italy (incl. San Marino \'SM\' -> 1993)':'Italy'})
    return df

def add_lat_lon(df, pos_countries):
    '''
    It add the latitude and longitude to the main dataset, so now it is ready to
    be passed to plotly

    Parameters
    ----------
    df: main pandas dataFrame
    pos_countries: pandas DataFrame with the latitude and longitude
    '''
    d_lat = dict(zip(pos_countries['name'], pos_countries['latitude']))
    d_long = dict(zip(pos_countries['name'], pos_countries['longitude']))
    lat = []
    long = []
    for index, row in df.iterrows(): #there are only 500, that's way I iterate over the rows
        lat.append(d_lat[row['REPORTER']])
        long.append(d_long[row['REPORTER']])
    df['Latitude'] = lat
    df['Longitude'] = long
    return df


if __name__ == '__main__':
    df = pd.read_csv('dataset_project_DSA/dataset_india_export/DS-1262527_1_Data.csv')
    pos_countries = pd.read_csv('position_countries.csv')

    print(add_lat_lon(clean_df(df), pos_countries))

### Mapping Imports and Exports
As the final step, we plot the import and exports data.
Our final representation has values of imports and exports contained inside the bubbles, that can be interactively clicked. It is also possible to select single countries to further analyse the relationship with the European Union. 
A slider has been added to make it easier investigating how the flows changed over time. 

In [None]:
import pandas as pd
import plotly.express as px


official_df = pd.read_csv('..\official_dataset.csv')
lat = official_df['Latitude'].unique()
lon = official_df['Longitude'].unique()

#subset dataset for import data
official_df_imp = official_df.loc[official_df['FLOW'] == 'IMPORT']
official_df_imp['Value'] = official_df_imp['Value'].str.replace(' ','').replace(':','0')
official_df_imp['Value'] = official_df_imp['Value'].astype(float)

#prepare dynamic map which shows evolution of imports over a two-year time
#the bubbles represent the size of imports of each EU countries from BRICS and USA
#it is possible to filter by partner and select the period to visualize

fig = px.scatter_geo(official_df_imp,
                    lat = 'Latitude',
                    lon = 'Longitude',
                    animation_frame = 'PERIOD',
                    size_max = 55,
                    hover_name = 'REPORTER',
                    size = 'Value',
                    color = 'PARTNER',
                    title="IMPORTS of EU countries from BRICS and USA")

fig.update_geos(fitbounds="locations", showcountries = True)
#plot map
fig.show()


#plot dynamic map which shows evolution of exports over a two-year time
#the bubbles represent the size of exports from each EU countries to BRICS and USA
#it is possible to filter by partner and select the period to visualize

official_df_exp = official_df.loc[official_df['FLOW'] == 'EXPORT']
official_df_exp['Value'] = official_df_exp['Value'].str.replace(' ','').replace(':','0')
official_df_exp['Value'] = official_df_exp['Value'].astype(float)


fig = px.scatter_geo(official_df_exp,
                    lat = 'Latitude',
                    lon = 'Longitude',
                    animation_frame = 'PERIOD',
                    size_max = 55,
                    hover_name = 'REPORTER',
                    size = 'Value',
                    color = 'PARTNER',
                    title="EXPORTS from EU countries to BRICS and USA")

fig.update_geos(fitbounds="locations", showcountries = True)
#plot map
fig.show()



### Conclusion 
The data observed are quite interesting, especially when looking at the change over time. It is possible to notice a significant spike in flows when going from 2020 to 2021.