In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Import the data

### FAOSTAT_data

In [None]:
faostat_df = pd.read_csv("/kaggle/input/temperature-change/FAOSTAT_data_11-24-2020.csv")

In [None]:
faostat_df.head(10)

### Environment Temperature change

In [None]:
envtem_df = pd.read_csv("/kaggle/input/temperature-change/Environment_Temperature_change_E_All_Data_NOFLAG.csv", encoding = 'latin1')

In [None]:
envtem_df.head(5)

## Feature selection

In [None]:
#Get a summary of the data
envtem_df.info()

In [None]:
#View the dimensions of the dataframe
envtem_df.shape

In [None]:
#get only where Element is Temperature change
envtem_df = envtem_df[envtem_df['Element'] == 'Temperature change']

In [None]:
#Drop columns 'Area Code', 'Months Code', 'Element Code', 'Unit'
envtem_df.drop(columns=['Area Code', 'Months Code', 'Element Code','Element', 'Unit'], axis=1, inplace=True)
envtem_df.head()

In [None]:
#Get the unique values of columns Months
envtem_df['Months'].unique()

In [None]:
#Get values where months is in : 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'
envtem_df = envtem_df[envtem_df['Months'].isin(['January', 'February', 'March', 'April', 'May', 'June', 'July',
       'August', 'September', 'October', 'November', 'December'])]
envtem_df['Months'].unique()

In [None]:
#Get the columns dataframe
envtem_df.columns

In [None]:
#Turn years columns into rows using melt function
envtem_df = pd.melt(envtem_df, id_vars = ['Area', 'Months'], var_name = 'years', value_name = 'temperature')
envtem_df.head()

In [None]:
#years without the Y letter
envtem_df['years'] = envtem_df['years'].str[1:]
envtem_df.head()

In [None]:
#Change the dtype from object to str
envtem_df[['Area', 'Months', 'years']] = envtem_df[['Area', 'Months', 'years']].astype('str')

# World temperature

In [None]:
envtem_df['Area'].unique()

In [None]:
#Create a wolrd temperature dataframe
world_df = envtem_df.loc[envtem_df.Area == 'World']

#Grouping by years to get the average of temperature in each year
grp_wolrd_df = world_df.groupby(['years'], as_index = False).mean()
grp_wolrd_df.head()

In [None]:
fig = px.line(grp_wolrd_df, x="years", y="temperature", title='Tempereature of the wolrd from 1961 to 2019', color_discrete_map = {"temperature":"red"})
fig.show()

In [None]:
fig = px.bar(grp_wolrd_df, x="years", y="temperature", title='Tempereature of the wolrd from 1961 to 2019', color_discrete_map = {"temperature":"red"})
fig.show()

In [None]:
wor_6119_df = world_df[world_df['years'].isin(['1961', '1971', '1981', '1991', '2001', '2019'])]
wor_6119_df.head()

In [None]:
fig = px.line(wor_6119_df, x="Months", y="temperature", color='years', title='Comparing tempereature of the wolrd')
fig.show()

# France

In [None]:
#Create France temperature dataframe
France_df = envtem_df.loc[envtem_df.Area == 'France']

#Grouping by years to get the average of temperature in each year
grp_Fr_df = France_df.groupby(['years'], as_index = False).mean()
grp_Fr_df.head()

In [None]:
#check if there is missing data
missing = France_df.isnull()
for col in missing:
    print(col)
    print(missing[col].value_counts())

In [None]:
fig = px.bar(grp_Fr_df, x="years", y="temperature", title='Tempereature in the France from 1961 to 2019', color_discrete_map = {"temperature":"red"})
fig.show()

In [None]:
#Create a data frame for years 1961, 1971, 1981, 1991, 2001, 2019
fr_6119_df = France_df[France_df['years'].isin(['1961', '1971', '1981', '1991', '2001', '2019'])]
fr_6119_df.head()

In [None]:
fig = px.line(fr_6119_df, x="Months", y="temperature", color='years', title='Comparing tempereature in France')
fig.show()

# Algeria

In [None]:
#Create Algeria temperature dataframe
Algeria_df = envtem_df.loc[envtem_df.Area == 'Algeria']

#Grouping by years to get the average of temperature in each year
grp_alg_df = Algeria_df.groupby(['years'], as_index = False).mean()
grp_alg_df.head()

In [None]:
#check if there is missing data
missing = Algeria_df.isnull()
for col in missing:
    print(col)
    print(missing[col].value_counts())

In [None]:
fig = px.bar(grp_alg_df, x="years", y="temperature", labels={'x':'Years', 'y':'Temperature'}, title='Tempereature in the Algeria from 1961 to 2019', color_discrete_map = {"temperature":"red"})
fig.show()

In [None]:
alg_6119_df = Algeria_df[Algeria_df['years'].isin(['1961', '1971', '1981', '1991', '2001', '2019'])]
alg_6119_df.head()

In [None]:
fig = px.line(alg_6119_df, x="Months", y="temperature", color='years', title='Comparing tempereature in Algeria')
fig.show()