# **Importing Modules**


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import pprint
from sklearn.linear_model import LinearRegression

In [None]:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# **Data Cleaning**

Reading the csv data into pandas dataframe 'data'

In [None]:
data=pd.read_csv("/content/drive/MyDrive/4th Sem Project/Data/GlobalLandTemperaturesByCountry.csv",parse_dates=['dt'])
data.head(10)

In [None]:
'''data=pd.read_csv("/content/drive/MyDrive/Datasets/Climate Change/Dataset 1/GlobalLandTemperaturesByCountry.csv",parse_dates=['dt'])
data.head(10)'''

In [None]:
temp=data

In [None]:
temp.isna().sum()

In [None]:
data.nunique()

Adding another column Year in the data frame for future reference

In [None]:
data['Year']=pd.DatetimeIndex(data['dt']).year

In [None]:
data.head(5)

Changing the data type of Country column from object to category using lambda function

In [None]:
data.loc[:,'Country']=data.select_dtypes(['object']).apply(lambda x: x.astype('category'))

In [None]:
data.info()

Checking if there are any missing values

In [None]:
data.isnull().sum()

Dropping the missing values

In [None]:
data=data.dropna(axis=0,how='any')

In [None]:
data.shape

In [None]:
data.nunique()

Creating an list of the countries and the years in the dataframe for future reference

In [None]:
countries=np.unique(data['Country'])

In [None]:
years=list(range(np.unique(data['Year']).min(),np.unique(data['Year']).max()+1))

As per countries there are more than 2000 values we are taking the yearly median per country

In [None]:
summary_df_avgtmp=data.groupby(['Country','Year']).agg({'AverageTemperature':'median'}).reset_index()
summary_df_avgtmp.head()

In [None]:
temp_summary_df=summary_df_avgtmp

Again checking for missing values

In [None]:
summary_df_avgtmp.isna().sum()

In [None]:
summary_df_avgtmp.shape

Creating dataframe of the years per country for which the yearly median is missing

In [None]:
yearwise_na=summary_df_avgtmp[['Country','Year']][summary_df_avgtmp.AverageTemperature.isna()]

In [None]:
yearwise_na.head()

Creating another datafrome to see how many data are missing per country in the summary_df_avgtmp pandas dataframe

In [None]:
countrywisena=summary_df_avgtmp.loc[:,['AverageTemperature']].isnull().groupby(summary_df_avgtmp.Country).sum().reset_index()
countrywisena.head()

As per country there are many missing values, but there are many countries for which between some years the data is missing. So we are trying to get the first year from which the data is started for each country, and we will drop the years before that particular.

exporting the above dataframes as csv

# **Data Manipulation**

##**Continentwise**


As in the previous section we have seen that there are 242 countries in the data but in reality there are only 194 countries. So I have downloaded a dataset of the actual countries with their population and world share and land area. And then I checked which countries from our working dataset are not in the actualcountries dataset.

In [None]:
count_real=pd.read_excel('/content/drive/MyDrive/4th Sem Project/Some_necessary_files/ActualCountriesNames.xlsx')
count_real.head()

In [None]:
continentwcountry=pd.read_excel('/content/drive/MyDrive/4th Sem Project/Some_necessary_files/Continentwisecountry.xlsx')
continentwcountry.head()

In [None]:
extra_count1=[]
for country in countries:
  if country not in list(continentwcountry['Country']):
    extra_count1.append(country)

pprint.pprint(extra_count1)

In [None]:
extra_count=[]
for country in continentwcountry['Country']:
  if country not in countries:
    extra_count.append(country)
pprint.pprint(extra_count)

In [None]:
countries_asia=list(continentwcountry['Country'][continentwcountry['Continent']=='Asia'])
countries_africa=list(continentwcountry['Country'][continentwcountry['Continent']=='Africa'])
countries_samerica=list(continentwcountry['Country'][continentwcountry['Continent']=='South America'])
countries_namerica=list(continentwcountry['Country'][continentwcountry['Continent']=='North America'])
countries_antartica=list(continentwcountry['Country'][continentwcountry['Continent']=='Antarctica'])
countries_europe=list(continentwcountry['Country'][continentwcountry['Continent']=='Europe'])
countries_oceania=list(continentwcountry['Country'][continentwcountry['Continent']=='Oceania'])

In [None]:
data_asia=temp[temp.Country.isin(countries_asia)]
data_africa=temp[temp.Country.isin(countries_africa)]
data_samerica=temp[temp.Country.isin(countries_samerica)]
data_namerica=temp[temp.Country.isin(countries_namerica)]
data_europe=temp[temp.Country.isin(countries_europe)]
data_oceania=temp[temp.Country.isin(countries_oceania)]
data_antarctica=temp[temp.Country.isin(countries_antartica)]

In [None]:
sample_asia=pd.DataFrame(list(np.unique(data_asia['Country']))).sample(10,random_state=2)
sample_asia=list(sample_asia[0])
sample_africa=pd.DataFrame(list(np.unique(data_africa['Country']))).sample(10,random_state=2)
sample_africa=list(sample_africa[0])
sample_samerica=pd.DataFrame(list(np.unique(data_samerica['Country']))).sample(5,random_state=0)
sample_samerica=list(sample_samerica[0])
sample_namerica=pd.DataFrame(list(np.unique(data_namerica['Country']))).sample(10,random_state=3)
sample_namerica=list(sample_namerica[0])
sample_oceania=pd.DataFrame(list(np.unique(data_oceania['Country']))).sample(5,random_state=0)
sample_oceania=list(sample_oceania[0])
sample_europe=pd.DataFrame(list(np.unique(data_europe['Country']))).sample(10,random_state=0)
sample_europe=list(sample_europe[0])

In [None]:
sampledata_asia=data_asia[data_asia.Country.isin(sample_asia)]
sampledata_africa=data_africa[data_africa.Country.isin(sample_africa)]
sampledata_namerica=data_namerica[data_namerica.Country.isin(sample_namerica)]
sampledata_samerica=data_samerica[data_samerica.Country.isin(sample_samerica)]
sampledata_oceania=data_oceania[data_oceania.Country.isin(sample_oceania)]
sampledata_europe=data_europe[data_europe.Country.isin(sample_europe)]

###Asia

In [None]:
pprint.pprint(sample_asia)

In [None]:
data_tajikistan=sampledata_asia[sampledata_asia['Country']=='Tajikistan']
data_vietnam=sampledata_asia[sampledata_asia['Country']=='Vietnam']
data_lebanon=sampledata_asia[sampledata_asia['Country']=='Lebanon']
data_afghanistan=sampledata_asia[sampledata_asia['Country']=='Afghanistan']
data_india=sampledata_asia[sampledata_asia['Country']=='India']
data_oman=sampledata_asia[sampledata_asia['Country']=='Oman']
data_israel=sampledata_asia[sampledata_asia['Country']=='Israel']
data_uae=sampledata_asia[sampledata_asia['Country']=='United Arab Emirates']
data_laos=sampledata_asia[sampledata_asia['Country']=='Laos']
data_christmas=sampledata_asia[sampledata_asia['Country']=='Christmas Island']

In [None]:
data_india

In [None]:
slicedata_asia={'Tajikistan':data_tajikistan,'India':data_india,'Vietnam':data_vietnam,\
                'Lebanon':data_lebanon,'Afghanistan':data_afghanistan,'Oman':data_oman,\
                'Israel':data_israel,'United Arab Emirates':data_uae,'Christmas Island':data_christmas,'Laos':data_laos}

In [None]:
ini_date=pd.to_datetime(['1743-11-01'])
for country in sample_asia:
  #the first data of the country
  
  df_date=sampledata_asia.loc[sampledata_asia[sampledata_asia['Country']==country].index[0],'dt']
  
  #difference of months between 1743-11-1 and the first date

  diff=((df_date.year-ini_date.year)*12+(df_date.month-ini_date.month))[0]
  print(slicedata_asia[country].shape)  
  #creating a new dataframe containing the missing dates

  new_df=pd.DataFrame(pd.Series(pd.date_range(start='1743-11-1',periods=diff,freq='MS')))
  new_df.rename(columns={0:'dt'},inplace=True)
  new_df['AverageTemperature']=np.nan
  new_df['AverageTemperatureUncertainty']=np.nan
  new_df['Country']=country
  new_df['Year']=np.nan
  new_df['Encode']=np.nan

  new_df=new_df.append(slicedata_asia[country],ignore_index=True)
  new_df['Encode']=range(3239)
  slicedata_asia[country]=new_df

  print(country+' '+str(diff))
  print(slicedata_asia[country].shape)

In [None]:
slicedata_asia['Oman']

###Africa

In [None]:
pprint.pprint(sample_africa)

In [None]:
data_Mozambique=sampledata_africa[sampledata_africa['Country']=='Mozambique']
data_Togo=sampledata_africa[sampledata_africa['Country']=='Togo']
data_WesternSahara=sampledata_africa[sampledata_africa['Country']=='Western Sahara']
data_Mauritius=sampledata_africa[sampledata_africa['Country']=='Mauritius']
data_Tunisia=sampledata_africa[sampledata_africa['Country']=='Tunisia']
data_Libya=sampledata_africa[sampledata_africa['Country']=='Libya']
data_Algeria=sampledata_africa[sampledata_africa['Country']=='Algeria']
data_Malawi=sampledata_africa[sampledata_africa['Country']=='Malawi']
data_Angola=sampledata_africa[sampledata_africa['Country']=='Angola']
data_Senegal=sampledata_africa[sampledata_africa['Country']=='Senegal']

In [None]:
slicedata_africa={'Mozambique':data_Mozambique,'Togo':data_Togo,'Western Sahara':data_WesternSahara,\
                'Mauritius':data_Mauritius,'Tunisia':data_Tunisia,'Libya':data_Libya,\
                'Algeria':data_Algeria,'Malawi':data_Malawi,'Angola':data_Angola,'Senegal':data_Senegal}

In [None]:
ini_date=pd.to_datetime(['1743-11-01'])
for country in sample_africa:
  #the first data of the country
  
  df_date=sampledata_africa.loc[sampledata_africa[sampledata_africa['Country']==country].index[0],'dt']
  
  #difference of months between 1743-11-1 and the first date

  diff=((df_date.year-ini_date.year)*12+(df_date.month-ini_date.month))[0]
  print(slicedata_africa[country].shape)  
  #creating a new dataframe containing the missing dates

  new_df=pd.DataFrame(pd.Series(pd.date_range(start='1743-11-1',periods=diff,freq='MS')))
  new_df.rename(columns={0:'dt'},inplace=True)
  new_df['AverageTemperature']=np.nan
  new_df['AverageTemperatureUncertainty']=np.nan
  new_df['Country']=country
  new_df['Year']=np.nan
  new_df['Encode']=np.nan

  new_df=new_df.append(slicedata_africa[country],ignore_index=True)
  new_df['Encode']=range(3239)
  slicedata_africa[country]=new_df

  print(country+' '+str(diff))
  print(slicedata_africa[country].shape)

In [None]:
slicedata_africa['Algeria']

###North America

In [None]:
pprint.pprint(sample_namerica)

In [None]:
data_PuertoRico=sampledata_namerica[sampledata_namerica['Country']=='Puerto Rico']
data_UnitedStates=sampledata_namerica[sampledata_namerica['Country']=='United States']
data_Montserrat=sampledata_namerica[sampledata_namerica['Country']=='Montserrat']
data_Mexico=sampledata_namerica[sampledata_namerica['Country']=='Mexico']
data_Belize=sampledata_namerica[sampledata_namerica['Country']=='Belize']
data_Canada=sampledata_namerica[sampledata_namerica['Country']=='Canada']
data_BakerIsland=sampledata_namerica[sampledata_namerica['Country']=='Baker Island']
data_SaintBarthélemy=sampledata_namerica[sampledata_namerica['Country']=='Saint Barthélemy']
data_Trinidad=sampledata_namerica[sampledata_namerica['Country']=='Trinidad And Tobago']
data_ElSalvador=sampledata_namerica[sampledata_namerica['Country']=='El Salvador']

In [None]:
slicedata_namerica={'Puerto Rico':data_PuertoRico,'United States':data_UnitedStates,'Montserrat':data_Montserrat,\
                'Mexico':data_Mexico,'Belize':data_Belize,'Canada':data_Canada,\
                'Baker Island':data_BakerIsland,'Saint Barthélemy':data_SaintBarthélemy,'Trinidad And Tobago':data_Trinidad,'El Salvador':data_ElSalvador}

In [None]:
ini_date=pd.to_datetime(['1743-11-01'])
for country in sample_namerica:
  #the first data of the country
  
  df_date=sampledata_namerica.loc[sampledata_namerica[sampledata_namerica['Country']==country].index[0],'dt']
  
  #difference of months between 1743-11-1 and the first date

  diff=((df_date.year-ini_date.year)*12+(df_date.month-ini_date.month))[0]
  print(slicedata_namerica[country].shape)  
  #creating a new dataframe containing the missing dates

  new_df=pd.DataFrame(pd.Series(pd.date_range(start='1743-11-1',periods=diff,freq='MS')))
  new_df.rename(columns={0:'dt'},inplace=True)
  new_df['AverageTemperature']=np.nan
  new_df['AverageTemperatureUncertainty']=np.nan
  new_df['Country']=country
  new_df['Year']=np.nan
  new_df['Encode']=np.nan

  new_df=new_df.append(slicedata_namerica[country],ignore_index=True)
  new_df['Encode']=range(3239)
  slicedata_namerica[country]=new_df

  print(country+' '+str(diff))
  print(slicedata_namerica[country].shape)

In [None]:
slicedata_namerica['El Salvador']

###Europe

In [None]:
pprint.pprint(sample_europe)

In [None]:
data_Slovakia=sampledata_europe[sampledata_europe['Country']=='Slovakia']
data_Moldova=sampledata_europe[sampledata_europe['Country']=='Moldova']
data_Portugal=sampledata_europe[sampledata_europe['Country']=='Portugal']
data_Monaco=sampledata_europe[sampledata_europe['Country']=='Monaco']
data_Estonia=sampledata_europe[sampledata_europe['Country']=='Estonia']
data_Austria=sampledata_europe[sampledata_europe['Country']=='Austria']
data_Malta=sampledata_europe[sampledata_europe['Country']=='Malta']
data_SanMarino=sampledata_europe[sampledata_europe['Country']=='San Marino']
data_Luxembourg=sampledata_europe[sampledata_europe['Country']=='Luxembourg']
data_Latvia=sampledata_europe[sampledata_europe['Country']=='Latvia']

In [None]:
slicedata_europe={'Slovakia':data_Slovakia,'Moldova':data_Moldova,'Portugal':data_Portugal,\
                'Monaco':data_Monaco,'Estonia':data_Estonia,'Austria':data_Austria,\
                'Malta':data_Malta,'San Marino':data_SanMarino,'Luxembourg':data_Luxembourg,'Latvia':data_Latvia}

In [None]:
ini_date=pd.to_datetime(['1743-11-01'])
for country in sample_europe:
  #the first data of the country
  
  df_date=sampledata_europe.loc[sampledata_europe[sampledata_europe['Country']==country].index[0],'dt']
  
  #difference of months between 1743-11-1 and the first date

  diff=((df_date.year-ini_date.year)*12+(df_date.month-ini_date.month))[0]
  print(slicedata_europe[country].shape)  
  #creating a new dataframe containing the missing dates

  new_df=pd.DataFrame(pd.Series(pd.date_range(start='1743-11-1',periods=diff,freq='MS')))
  new_df.rename(columns={0:'dt'},inplace=True)
  new_df['AverageTemperature']=np.nan
  new_df['AverageTemperatureUncertainty']=np.nan
  new_df['Country']=country
  new_df['Year']=np.nan
  new_df['Encode']=np.nan

  new_df=new_df.append(slicedata_europe[country],ignore_index=True)
  new_df['Encode']=range(3239)
  slicedata_europe[country]=new_df

  print(country+' '+str(diff))
  print(slicedata_europe[country].shape)

In [None]:
slicedata_europe['Portugal']

###South America

In [None]:
pprint.pprint(sample_samerica)

In [None]:
data_Bolivia=sampledata_samerica[sampledata_samerica['Country']=='Bolivia']
data_FalklandIslands=sampledata_samerica[sampledata_samerica['Country']=='Falkland Islands (Islas Malvinas)']
data_guyana=sampledata_samerica[sampledata_samerica['Country']=='Guyana']
data_paraguay=sampledata_samerica[sampledata_samerica['Country']=='Paraguay']
data_Venezuela=sampledata_samerica[sampledata_samerica['Country']=='Venezuela']

In [None]:
slicedata_samerica={'Bolivia':data_Bolivia,'Falkland Islands (Islas Malvinas)':data_FalklandIslands,'Guyana':data_guyana,\
                'Paraguay':data_paraguay,'Venezuela':data_Venezuela}

In [None]:
ini_date=pd.to_datetime(['1743-11-01'])
for country in sample_samerica:
  #the first data of the country
  
  df_date=sampledata_samerica.loc[sampledata_samerica[sampledata_samerica['Country']==country].index[0],'dt']
  
  #difference of months between 1743-11-1 and the first date

  diff=((df_date.year-ini_date.year)*12+(df_date.month-ini_date.month))[0]
  print(slicedata_samerica[country].shape)  
  #creating a new dataframe containing the missing dates

  new_df=pd.DataFrame(pd.Series(pd.date_range(start='1743-11-1',periods=diff,freq='MS')))
  new_df.rename(columns={0:'dt'},inplace=True)
  new_df['AverageTemperature']=np.nan
  new_df['AverageTemperatureUncertainty']=np.nan
  new_df['Country']=country
  new_df['Year']=np.nan
  new_df['Encode']=np.nan

  new_df=new_df.append(slicedata_samerica[country],ignore_index=True)
  new_df['Encode']=range(3239)
  slicedata_samerica[country]=new_df

  print(country+' '+str(diff))
  print(slicedata_samerica[country].shape)

###Oceania

In [None]:
pprint.pprint(sample_oceania)

In [None]:
data_australia=sampledata_oceania[sampledata_oceania['Country']=='Australia']
data_kingman=sampledata_oceania[sampledata_oceania['Country']=='Kingman Reef']
data_caledonia=sampledata_oceania[sampledata_oceania['Country']=='New Caledonia']
data_newzealand=sampledata_oceania[sampledata_oceania['Country']=='New Zealand']
data_papanew=sampledata_oceania[sampledata_oceania['Country']=='Papua New Guinea']

In [None]:
slicedata_oceania={'Australia':data_australia,'Kingman Reef':data_kingman,'New Caledonia':data_caledonia,\
                'New Zealand':data_newzealand,'Papua New Guinea':data_papanew}

In [None]:
ini_date=pd.to_datetime(['1743-11-01'])
for country in sample_oceania:
  #the first data of the country
  
  df_date=sampledata_oceania.loc[sampledata_oceania[sampledata_oceania['Country']==country].index[0],'dt']
  
  #difference of months between 1743-11-1 and the first date

  diff=((df_date.year-ini_date.year)*12+(df_date.month-ini_date.month))[0]
  print(slicedata_oceania[country].shape)  
  #creating a new dataframe containing the missing dates

  new_df=pd.DataFrame(pd.Series(pd.date_range(start='1743-11-1',periods=diff,freq='MS')))
  new_df.rename(columns={0:'dt'},inplace=True)
  new_df['AverageTemperature']=np.nan
  new_df['AverageTemperatureUncertainty']=np.nan
  new_df['Country']=country
  new_df['Year']=np.nan
  new_df['Encode']=np.nan

  new_df=new_df.append(slicedata_oceania[country],ignore_index=True)
  new_df['Encode']=range(3239)
  slicedata_oceania[country]=new_df

  print(country+' '+str(diff))
  print(slicedata_oceania[country].shape)

###Antarctica

In [None]:
pprint.pprint(np.unique(data_antarctica['Country']))

In [None]:
data_antar=data_antarctica[data_antarctica['Country']=='Antarctica']
data_frenchsouthern=data_antarctica[data_antarctica['Country']=='French Southern And Antarctic Lands']
data_heard=data_antarctica[data_antarctica['Country']=='Heard Island And Mcdonald Islands']

In [None]:
slicedata_antarctica={'Antarctica':data_antar,'French Southern And Antarctic Lands':data_frenchsouthern,'Heard Island And Mcdonald Islands':data_heard}

In [None]:
ini_date=pd.to_datetime(['1743-11-01'])
for country in slicedata_antarctica.keys():
  #the first data of the country
  
  df_date=data_antarctica.loc[data_antarctica[data_antarctica['Country']==country].index[0],'dt']
  
  #difference of months between 1743-11-1 and the first date

  diff=((df_date.year-ini_date.year)*12+(df_date.month-ini_date.month))[0]
  print(slicedata_antarctica[country].shape)  
  #creating a new dataframe containing the missing dates

  new_df=pd.DataFrame(pd.Series(pd.date_range(start='1743-11-1',periods=diff,freq='MS')))
  new_df.rename(columns={0:'dt'},inplace=True)
  new_df['AverageTemperature']=np.nan
  new_df['AverageTemperatureUncertainty']=np.nan
  new_df['Country']=country
  new_df['Year']=np.nan
  new_df['Encode']=np.nan

  new_df=new_df.append(slicedata_antarctica[country],ignore_index=True)
  new_df['Encode']=range(3239)
  slicedata_antarctica[country]=new_df

  print(country+' '+str(diff))
  print(slicedata_antarctica[country].shape)

In [None]:
slicedata_antarctica['French Southern And Antarctic Lands']

##**Geographical Region Wise**

# **Estimation of the missing values for the random sample**

Note
1. Asia 1-10
2. Africa 11-20
3. Namerica 21-30
4. Europe 31-40
5. Samerica 41-45
6. Oceania 46-50
7. Antarctica 51,52



##**Asia**


In [None]:
pprint.pprint(sample_asia)

In [None]:
slicedata_asia_nona={}
for country in sample_asia:
  slicedata_asia_nona[country]=slicedata_asia[country].dropna(how='any')

###Tajikistan

Reg1

In [None]:
x1=np.array(slicedata_asia_nona['Tajikistan']['Encode']).reshape(-1,1)
y1=np.array(slicedata_asia_nona['Tajikistan']['AverageTemperature'])
reg1=LinearRegression()
reg1.fit(x1,y1)

In [None]:
plt.scatter(x1,y1,color='red',alpha=0.05)
plt.plot(x1,reg1.predict(x1),color='blue')
plt.show()

###India
Reg2

In [None]:
x2=np.array(slicedata_asia_nona['India']['Encode']).reshape(-1,1)
y2=np.array(slicedata_asia_nona['India']['AverageTemperature'])
reg2=LinearRegression()
reg2.fit(x2,y2)

In [None]:
plt.scatter(x2,y2,color='red',alpha=0.05)
plt.plot(x2,reg2.predict(x2),color='blue')
plt.show()

###Vietnam

Reg3

In [None]:
x3=np.array(slicedata_asia_nona['Vietnam']['Encode']).reshape(-1,1)
y3=np.array(slicedata_asia_nona['Vietnam']['AverageTemperature'])
reg3=LinearRegression()
reg3.fit(x3,y3) 

In [None]:
plt.scatter(x3,y3,color='red',alpha=0.05)
plt.plot(x3,reg3.predict(x3),color='blue')
plt.show()

###Lebanon
Reg 4

In [None]:
x4=np.array(slicedata_asia_nona['Lebanon']['Encode']).reshape(-1,1)
y4=np.array(slicedata_asia_nona['Lebanon']['AverageTemperature'])
reg4=LinearRegression()
reg4.fit(x4,y4)

In [None]:
plt.scatter(x4,y4,color='red',alpha=0.05)
plt.plot(x4,reg4.predict(x4),color='blue')
plt.show()

###Afghanistan

Reg 5

In [None]:
x5=np.array(slicedata_asia_nona['Afghanistan']['Encode']).reshape(-1,1)
y5=np.array(slicedata_asia_nona['Afghanistan']['AverageTemperature'])
reg5=LinearRegression()
reg5.fit(x5,y5)

In [None]:
plt.scatter(x5,y5,color='red',alpha=0.05)
plt.plot(x5,reg5.predict(x5),color='blue')
plt.show()

###**Oman**
Reg 6

In [None]:
x6=np.array(slicedata_asia_nona['Oman']['Encode']).reshape(-1,1)
y6=np.array(slicedata_asia_nona['Oman']['AverageTemperature'])
reg6=LinearRegression()
reg6.fit(x6,y6)

In [None]:
plt.scatter(x6,y6,color='red',alpha=0.05)
plt.plot(x6,reg6.predict(x6),color='blue')
plt.show()

###Christmas Island

Reg 7 

In [None]:
x7=np.array(slicedata_asia_nona['Christmas Island']['Encode']).reshape(-1,1)
y7=np.array(slicedata_asia_nona['Christmas Island']['AverageTemperature'])
reg7=LinearRegression()
reg7.fit(x7,y7)

In [None]:
plt.scatter(x7,y7,color='red',alpha=0.05)
plt.plot(x7,reg7.predict(x7),color='blue')
plt.show()

###Laos
Reg 8

In [None]:
x8=np.array(slicedata_asia_nona['Laos']['Encode']).reshape(-1,1)
y8=np.array(slicedata_asia_nona['Laos']['AverageTemperature'])
reg8=LinearRegression()
reg8.fit(x8,y8)

In [None]:
plt.scatter(x8,y8,color='red',alpha=0.05)
plt.plot(x8,reg8.predict(x8),color='blue')
plt.show()

###Israel
Reg9

In [None]:
x9=np.array(slicedata_asia_nona['Israel']['Encode']).reshape(-1,1)
y9=np.array(slicedata_asia_nona['Israel']['AverageTemperature'])
reg9=LinearRegression()
reg9.fit(x9,y9)

In [None]:
plt.scatter(x9,y9,color='red',alpha=0.05)
plt.plot(x9,reg9.predict(x9),color='blue')
plt.show()

###United Arab Emirates
Reg 10

In [None]:
x10=np.array(slicedata_asia_nona['United Arab Emirates']['Encode']).reshape(-1,1)
y10=np.array(slicedata_asia_nona['United Arab Emirates']['AverageTemperature'])
reg10=LinearRegression()
reg10.fit(x10,y10)

In [None]:
plt.scatter(x10,y10,color='red',alpha=0.05)
plt.plot(x10,reg10.predict(x10),color='blue')
plt.show()

### **Filling the NaN values of Asia** 

In [None]:
reg_list=[reg1,reg2,reg3,reg4,reg5,reg6,reg7,reg8,reg9,reg10]

for country,reg in zip(sample_asia,reg_list):
  for i,j in zip(list(slicedata_asia[country]['Encode']),slicedata_asia[country].index):
    if i not in list(slicedata_asia_nona[country]['Encode']):
      slicedata_asia[country].loc[j,'AverageTemperature']=reg.predict([[i]])

## **Africa**

In [None]:
pprint.pprint(sample_africa)

In [None]:
slicedata_africa_nona={}
for country in sample_africa:
  slicedata_africa_nona[country]=slicedata_africa[country].dropna(how='any')

###Mozambique

In [None]:
x11=np.array(slicedata_africa_nona['Mozambique']['Encode']).reshape(-1,1)
y11=np.array(slicedata_africa_nona['Mozambique']['AverageTemperature'])
reg11=LinearRegression()
reg11.fit(x11,y11)

In [None]:
plt.scatter(x11,y11,color='red',alpha=0.05)
plt.plot(x11,reg11.predict(x11),color='blue')
plt.show()

###Togo

In [None]:
x12=np.array(slicedata_africa_nona['Togo']['Encode']).reshape(-1,1)
y12=np.array(slicedata_africa_nona['Togo']['AverageTemperature'])
reg12=LinearRegression()
reg12.fit(x12,y12)

In [None]:
plt.scatter(x12,y12,color='red',alpha=0.05)
plt.plot(x12,reg12.predict(x12),color='blue')
plt.show()

###Western Sahara

In [None]:
x13=np.array(slicedata_africa_nona['Western Sahara']['Encode']).reshape(-1,1)
y13=np.array(slicedata_africa_nona['Western Sahara']['AverageTemperature'])
reg13=LinearRegression()
reg13.fit(x13,y13)

In [None]:
plt.scatter(x13,y13,color='red',alpha=0.05)
plt.plot(x13,reg13.predict(x13),color='blue')
plt.show()

###Mauritius

In [None]:
x14=np.array(slicedata_africa_nona['Mauritius']['Encode']).reshape(-1,1)
y14=np.array(slicedata_africa_nona['Mauritius']['AverageTemperature'])
reg14=LinearRegression()
reg14.fit(x14,y14)

In [None]:
plt.scatter(x14,y14,color='red',alpha=0.05)
plt.plot(x14,reg14.predict(x14),color='blue')
plt.show()

###Tunisia

In [None]:
x15=np.array(slicedata_africa_nona['Tunisia']['Encode']).reshape(-1,1)
y15=np.array(slicedata_africa_nona['Tunisia']['AverageTemperature'])
reg15=LinearRegression()
reg15.fit(x15,y15)

In [None]:
plt.scatter(x15,y15,color='red',alpha=0.05)
plt.plot(x15,reg15.predict(x15),color='blue')
plt.show()

###Libya

In [None]:
x16=np.array(slicedata_africa_nona['Libya']['Encode']).reshape(-1,1)
y16=np.array(slicedata_africa_nona['Libya']['AverageTemperature'])
reg16=LinearRegression()
reg16.fit(x16,y16)

In [None]:
plt.scatter(x16,y16,color='red',alpha=0.05)
plt.plot(x16,reg16.predict(x16),color='blue')
plt.show()

###Algeria

In [None]:
x17=np.array(slicedata_africa_nona['Algeria']['Encode']).reshape(-1,1)
y17=np.array(slicedata_africa_nona['Algeria']['AverageTemperature'])
reg17=LinearRegression()
reg17.fit(x17,y17)

In [None]:
plt.scatter(x17,y17,color='red',alpha=0.05)
plt.plot(x17,reg17.predict(x17),color='blue')
plt.show()

###Malawi

In [None]:
x18=np.array(slicedata_africa_nona['Malawi']['Encode']).reshape(-1,1)
y18=np.array(slicedata_africa_nona['Malawi']['AverageTemperature'])
reg18=LinearRegression()
reg18.fit(x18,y18)

In [None]:
plt.scatter(x18,y18,color='red',alpha=0.05)
plt.plot(x18,reg18.predict(x18),color='blue')
plt.show()

###Angola

In [None]:
x19=np.array(slicedata_africa_nona['Angola']['Encode']).reshape(-1,1)
y19=np.array(slicedata_africa_nona['Angola']['AverageTemperature'])
reg19=LinearRegression()
reg19.fit(x19,y19)

In [None]:
plt.scatter(x19,y19,color='red',alpha=0.05)
plt.plot(x19,reg19.predict(x19),color='blue')
plt.show()

###Senegal

In [None]:
x20=np.array(slicedata_africa_nona['Senegal']['Encode']).reshape(-1,1)
y20=np.array(slicedata_africa_nona['Senegal']['AverageTemperature'])
reg20=LinearRegression()
reg20.fit(x20,y20)

In [None]:
plt.scatter(x20,y20,color='red',alpha=0.05)
plt.plot(x20,reg20.predict(x20),color='blue')
plt.show()

### **Filling the NaN values of Africa** 

In [None]:
reg_list2=[reg11,reg12,reg13,reg14,reg15,reg16,reg17,reg18,reg19,reg20]

for country,reg in zip(sample_africa,reg_list2):
  for i,j in zip(list(slicedata_africa[country]['Encode']),slicedata_africa[country].index):
    if i not in list(slicedata_africa_nona[country]['Encode']):
      slicedata_africa[country].loc[j,'AverageTemperature']=reg.predict([[i]])

## **North America**

In [None]:
pprint.pprint(sample_namerica)

In [None]:
slicedata_namerica_nona={}
for country in sample_namerica:
  slicedata_namerica_nona[country]=slicedata_namerica[country].dropna(how='any')

###Puerto Rico
Reg 21

In [None]:
x21=np.array(slicedata_namerica_nona['Puerto Rico']['Encode']).reshape(-1,1)
y21=np.array(slicedata_namerica_nona['Puerto Rico']['AverageTemperature'])
reg21=LinearRegression()
reg21.fit(x21,y21)

In [None]:
plt.scatter(x21,y21,color='red',alpha=0.05)
plt.plot(x21,reg21.predict(x21),color='blue')
plt.show()

###United States
Reg 22

In [None]:
x22=np.array(slicedata_namerica_nona['United States']['Encode']).reshape(-1,1)
y22=np.array(slicedata_namerica_nona['United States']['AverageTemperature'])
reg22=LinearRegression()
reg22.fit(x22,y22)

In [None]:
plt.scatter(x22,y22,color='red',alpha=0.05)
plt.plot(x22,reg22.predict(x22),color='blue')
plt.show()

###Montserrat Reg 23


In [None]:
x23=np.array(slicedata_namerica_nona['Montserrat']['Encode']).reshape(-1,1)
y23=np.array(slicedata_namerica_nona['Montserrat']['AverageTemperature'])
reg23=LinearRegression()
reg23.fit(x23,y23)

In [None]:
plt.scatter(x23,y23,color='red',alpha=0.05)
plt.plot(x23,reg23.predict(x23),color='blue')
plt.show()

###Mexico 
Reg 24

In [None]:
x24=np.array(slicedata_namerica_nona['Montserrat']['Encode']).reshape(-1,1)
y24=np.array(slicedata_namerica_nona['Montserrat']['AverageTemperature'])
reg24=LinearRegression()
reg24.fit(x24,y24)

In [None]:
plt.scatter(x24,y24,color='red',alpha=0.05)
plt.plot(x24,reg24.predict(x24),color='blue')
plt.show()

###Belize Reg 25

In [None]:
x25=np.array(slicedata_namerica_nona['Belize']['Encode']).reshape(-1,1)
y25=np.array(slicedata_namerica_nona['Belize']['AverageTemperature'])
reg25=LinearRegression()
reg25.fit(x25,y25)

In [None]:
plt.scatter(x25,y25,color='red',alpha=0.05)
plt.plot(x25,reg25.predict(x25),color='blue')
plt.show()

###Canada Reg 26

In [None]:
x26=np.array(slicedata_namerica_nona['Canada']['Encode']).reshape(-1,1)
y26=np.array(slicedata_namerica_nona['Canada']['AverageTemperature'])
reg26=LinearRegression()
reg26.fit(x26,y26)

In [None]:
plt.scatter(x26,y26,color='red',alpha=0.05)
plt.plot(x26,reg26.predict(x26),color='blue')
plt.show()

###Baker Island Reg 27

In [None]:
x27=np.array(slicedata_namerica_nona['Baker Island']['Encode']).reshape(-1,1)
y27=np.array(slicedata_namerica_nona['Baker Island']['AverageTemperature'])
reg27=LinearRegression()
reg27.fit(x27,y27)

In [None]:
plt.scatter(x27,y27,color='red',alpha=0.05)
plt.plot(x27,reg27.predict(x27),color='blue')
plt.show()

###Saint Barthélemy Reg 28

In [None]:
x28=np.array(slicedata_namerica_nona['Saint Barthélemy']['Encode']).reshape(-1,1)
y28=np.array(slicedata_namerica_nona['Saint Barthélemy']['AverageTemperature'])
reg28=LinearRegression()
reg28.fit(x28,y28)

In [None]:
plt.scatter(x28,y28,color='red',alpha=0.05)
plt.plot(x28,reg28.predict(x28),color='blue')
plt.show()

###Trinidad And Tobago Reg 29

In [None]:
x29=np.array(slicedata_namerica_nona['Trinidad And Tobago']['Encode']).reshape(-1,1)
y29=np.array(slicedata_namerica_nona['Trinidad And Tobago']['AverageTemperature'])
reg29=LinearRegression()
reg29.fit(x29,y29)

In [None]:
plt.scatter(x29,y29,color='red',alpha=0.05)
plt.plot(x29,reg29.predict(x29),color='blue')
plt.show()

###El Salvador Reg 30

In [None]:
x30=np.array(slicedata_namerica_nona['El Salvador']['Encode']).reshape(-1,1)
y30=np.array(slicedata_namerica_nona['El Salvador']['AverageTemperature'])
reg30=LinearRegression()
reg30.fit(x30,y30)

In [None]:
plt.scatter(x30,y30,color='red',alpha=0.05)
plt.plot(x30,reg30.predict(x30),color='blue')
plt.show()

### **Filling the NaN values of North America** 

In [None]:
reg_list3=[reg21,reg22,reg23,reg24,reg25,reg26,reg27,reg28,reg29,reg30]

for country,reg in zip(sample_namerica,reg_list3):
  for i,j in zip(list(slicedata_namerica[country]['Encode']),slicedata_namerica[country].index):
    if i not in list(slicedata_namerica_nona[country]['Encode']):
      slicedata_namerica[country].loc[j,'AverageTemperature']=reg.predict([[i]])

In [None]:
for country in sample_namerica:
  print(slicedata_namerica[country]['AverageTemperature'].isna().sum())

## **Europe**

In [None]:
pprint.pprint(sample_europe)

In [None]:
slicedata_europe_nona={}
for country in sample_europe:
  slicedata_europe_nona[country]=slicedata_europe[country].dropna(how='any')

###Slovakia Reg 31

In [None]:
x31=np.array(slicedata_europe_nona['Slovakia']['Encode']).reshape(-1,1)
y31=np.array(slicedata_europe_nona['Slovakia']['AverageTemperature'])
reg31=LinearRegression()
reg31.fit(x31,y31)

In [None]:
plt.scatter(x31,y31,color='red',alpha=0.05)
plt.plot(x31,reg31.predict(x31),color='blue')
plt.show()

###Moldova Reg 32

In [None]:
x32=np.array(slicedata_europe_nona['Moldova']['Encode']).reshape(-1,1)
y32=np.array(slicedata_europe_nona['Moldova']['AverageTemperature'])
reg32=LinearRegression()
reg32.fit(x32,y32)

In [None]:
plt.scatter(x32,y32,color='red',alpha=0.05)
plt.plot(x32,reg32.predict(x31),color='blue')
plt.show()

###Portugal Reg 33

In [None]:
x33=np.array(slicedata_europe_nona['Portugal']['Encode']).reshape(-1,1)
y33=np.array(slicedata_europe_nona['Portugal']['AverageTemperature'])
reg33=LinearRegression()
reg33.fit(x33,y33)

In [None]:
plt.scatter(x33,y33,color='red',alpha=0.05)
plt.plot(x33,reg33.predict(x33),color='blue')
plt.show()

###Monaco Reg 34

In [None]:
x34=np.array(slicedata_europe_nona['Monaco']['Encode']).reshape(-1,1)
y34=np.array(slicedata_europe_nona['Monaco']['AverageTemperature'])
reg34=LinearRegression()
reg34.fit(x34,y34)

In [None]:
plt.scatter(x34,y34,color='red',alpha=0.05)
plt.plot(x34,reg34.predict(x34),color='blue')
plt.show()

###Estonia Reg 35

In [None]:
x35=np.array(slicedata_europe_nona['Estonia']['Encode']).reshape(-1,1)
y35=np.array(slicedata_europe_nona['Estonia']['AverageTemperature'])
reg35=LinearRegression()
reg35.fit(x35,y35)

In [None]:
plt.scatter(x35,y35,color='red',alpha=0.05)
plt.plot(x35,reg35.predict(x35),color='blue')
plt.show()

###Austria Reg 36

In [None]:
x36=np.array(slicedata_europe_nona['Austria']['Encode']).reshape(-1,1)
y36=np.array(slicedata_europe_nona['Austria']['AverageTemperature'])
reg36=LinearRegression()
reg36.fit(x36,y36)

In [None]:
plt.scatter(x36,y36,color='red',alpha=0.05)
plt.plot(x36,reg36.predict(x36),color='blue')
plt.show()

###Malta Reg 37

In [None]:
x37=np.array(slicedata_europe_nona['Malta']['Encode']).reshape(-1,1)
y37=np.array(slicedata_europe_nona['Malta']['AverageTemperature'])
reg37=LinearRegression()
reg37.fit(x37,y37)

In [None]:
plt.scatter(x37,y37,color='red',alpha=0.05)
plt.plot(x37,reg37.predict(x37),color='blue')
plt.show()

###San Marino Reg 38

In [None]:
x38=np.array(slicedata_europe_nona['San Marino']['Encode']).reshape(-1,1)
y38=np.array(slicedata_europe_nona['San Marino']['AverageTemperature'])
reg38=LinearRegression()
reg38.fit(x38,y38)

In [None]:
plt.scatter(x38,y38,color='red',alpha=0.05)
plt.plot(x38,reg38.predict(x38),color='blue')
plt.show()

###Luxembourg Reg 39

In [None]:
x39=np.array(slicedata_europe_nona['Luxembourg']['Encode']).reshape(-1,1)
y39=np.array(slicedata_europe_nona['Luxembourg']['AverageTemperature'])
reg39=LinearRegression()
reg39.fit(x39,y39)

In [None]:
plt.scatter(x39,y39,color='red',alpha=0.05)
plt.plot(x39,reg39.predict(x39),color='blue')
plt.show()

###Latvia Reg 40

In [None]:
x40=np.array(slicedata_europe_nona['Latvia']['Encode']).reshape(-1,1)
y40=np.array(slicedata_europe_nona['Latvia']['AverageTemperature'])
reg40=LinearRegression()
reg40.fit(x40,y40)

In [None]:
plt.scatter(x40,y40,color='red',alpha=0.05)
plt.plot(x40,reg40.predict(x40),color='blue')
plt.show()

### **Filling the NaN values of Europe** 

In [None]:
reg_list4=[reg31,reg32,reg33,reg34,reg35,reg36,reg37,reg38,reg39,reg40]

for country,reg in zip(sample_europe,reg_list4):
  for i,j in zip(list(slicedata_europe[country]['Encode']),slicedata_europe[country].index):
    if i not in list(slicedata_europe_nona[country]['Encode']):
      slicedata_europe[country].loc[j,'AverageTemperature']=reg.predict([[i]])

In [None]:
for country in sample_europe:
  print(slicedata_europe[country]['AverageTemperature'].isna().sum())

## **South America**

In [None]:
pprint.pprint(sample_samerica)

In [None]:
slicedata_samerica_nona={}
for country in sample_samerica:
  slicedata_samerica_nona[country]=slicedata_samerica[country].dropna(how='any')

###Bolivia Reg 41

In [None]:
x41=np.array(slicedata_samerica_nona['Bolivia']['Encode']).reshape(-1,1)
y41=np.array(slicedata_samerica_nona['Bolivia']['AverageTemperature'])
reg41=LinearRegression()
reg41.fit(x41,y41)

In [None]:
plt.scatter(x41,y41,color='red',alpha=0.05)
plt.plot(x41,reg41.predict(x41),color='blue')
plt.show()

###Falkland Islands (Islas Malvinas) Reg 42

In [None]:
x42=np.array(slicedata_samerica_nona['Falkland Islands (Islas Malvinas)']['Encode']).reshape(-1,1)
y42=np.array(slicedata_samerica_nona['Falkland Islands (Islas Malvinas)']['AverageTemperature'])
reg42=LinearRegression()
reg42.fit(x42,y42)

In [None]:
plt.scatter(x42,y42,color='red',alpha=0.05)
plt.plot(x42,reg42.predict(x42),color='blue')
plt.show()

###Guyana Reg 43

In [None]:
x43=np.array(slicedata_samerica_nona['Guyana']['Encode']).reshape(-1,1)
y43=np.array(slicedata_samerica_nona['Guyana']['AverageTemperature'])
reg43=LinearRegression()
reg43.fit(x43,y43)

In [None]:
plt.scatter(x43,y43,color='red',alpha=0.05)
plt.plot(x43,reg43.predict(x43),color='blue')
plt.show()

###Paraguay Reg 44

In [None]:
x44=np.array(slicedata_samerica_nona['Paraguay']['Encode']).reshape(-1,1)
y44=np.array(slicedata_samerica_nona['Paraguay']['AverageTemperature'])
reg44=LinearRegression()
reg44.fit(x44,y44)

In [None]:
plt.scatter(x44,y44,color='red',alpha=0.05)
plt.plot(x44,reg44.predict(x44),color='blue')
plt.show()

###Venezuela Reg 45

In [None]:
x45=np.array(slicedata_samerica_nona['Venezuela']['Encode']).reshape(-1,1)
y45=np.array(slicedata_samerica_nona['Venezuela']['AverageTemperature'])
reg45=LinearRegression()
reg45.fit(x45,y45)

In [None]:
plt.scatter(x45,y45,color='red',alpha=0.05)
plt.plot(x45,reg45.predict(x45),color='blue')
plt.show()

### **Filling the NaN values of Europe** 

In [None]:
reg_list5=[reg41,reg42,reg43,reg44,reg45]

for country,reg in zip(sample_samerica,reg_list5):
  for i,j in zip(list(slicedata_samerica[country]['Encode']),slicedata_samerica[country].index):
    if i not in list(slicedata_samerica_nona[country]['Encode']):
      slicedata_samerica[country].loc[j,'AverageTemperature']=reg.predict([[i]])

In [None]:
for country in sample_samerica:
  print(slicedata_samerica[country]['AverageTemperature'].isna().sum())

## **Oceania**

In [None]:
pprint.pprint(sample_oceania)

In [None]:
slicedata_oceania_nona={}
for country in sample_oceania:
  slicedata_oceania_nona[country]=slicedata_oceania[country].dropna(how='any')

###Australia

In [None]:
x46=np.array(slicedata_oceania_nona['Australia']['Encode']).reshape(-1,1)
y46=np.array(slicedata_oceania_nona['Australia']['AverageTemperature'])
reg46=LinearRegression()
reg46.fit(x46,y46)

In [None]:
plt.scatter(x46,y46,color='red',alpha=0.05)
plt.plot(x46,reg46.predict(x46),color='blue')
plt.show()

###Kingman Reef

In [None]:
x47=np.array(slicedata_oceania_nona['Kingman Reef']['Encode']).reshape(-1,1)
y47=np.array(slicedata_oceania_nona['Kingman Reef']['AverageTemperature'])
reg47=LinearRegression()
reg47.fit(x47,y47)

In [None]:
plt.scatter(x47,y47,color='red',alpha=0.05)
plt.plot(x47,reg47.predict(x47),color='blue')
plt.show()

###New Caledonia

In [None]:
x48=np.array(slicedata_oceania_nona['New Caledonia']['Encode']).reshape(-1,1)
y48=np.array(slicedata_oceania_nona['New Caledonia']['AverageTemperature'])
reg48=LinearRegression()
reg48.fit(x48,y48)

In [None]:
plt.scatter(x48,y48,color='red',alpha=0.05)
plt.plot(x48,reg48.predict(x48),color='blue')
plt.show()

###New Zealand

In [None]:
x49=np.array(slicedata_oceania_nona['New Zealand']['Encode']).reshape(-1,1)
y49=np.array(slicedata_oceania_nona['New Zealand']['AverageTemperature'])
reg49=LinearRegression()
reg49.fit(x49,y49)

In [None]:
plt.scatter(x49,y49,color='red',alpha=0.05)
plt.plot(x49,reg49.predict(x49),color='blue')
plt.show()

###Papua New Guinea

In [None]:
x50=np.array(slicedata_oceania_nona['Papua New Guinea']['Encode']).reshape(-1,1)
y50=np.array(slicedata_oceania_nona['Papua New Guinea']['AverageTemperature'])
reg50=LinearRegression()
reg50.fit(x50,y50)

In [None]:
plt.scatter(x50,y50,color='red',alpha=0.05)
plt.plot(x50,reg50.predict(x50),color='blue')
plt.show()

### **Filling the NaN values of Europe** 

In [None]:
reg_list6=[reg46,reg47,reg48,reg49,reg50]

for country,reg in zip(sample_oceania,reg_list6):
  for i,j in zip(list(slicedata_oceania[country]['Encode']),slicedata_oceania[country].index):
    if i not in list(slicedata_oceania_nona[country]['Encode']):
      slicedata_oceania[country].loc[j,'AverageTemperature']=reg.predict([[i]])

In [None]:
for country in sample_oceania:
  print(slicedata_oceania[country]['AverageTemperature'].isna().sum())

## **Antarctica**


In [None]:
pprint.pprint(slicedata_antarctica.keys())

In [None]:
slicedata_antarctica_nona={}
for country in slicedata_antarctica.keys():
  slicedata_antarctica_nona[country]=slicedata_antarctica[country].dropna(how='any')

###French Southern And Antarctic Lands

In [None]:
x51=np.array(slicedata_antarctica_nona['French Southern And Antarctic Lands']['Encode']).reshape(-1,1)
y51=np.array(slicedata_antarctica_nona['French Southern And Antarctic Lands']['AverageTemperature'])
reg51=LinearRegression()
reg51.fit(x51,y51)

In [None]:
plt.scatter(x51,y51,color='red',alpha=0.05)
plt.plot(x51,reg51.predict(x51),color='blue')
plt.show()

###Heard Island And Mcdonald Islands

In [None]:
x52=np.array(slicedata_antarctica_nona['Heard Island And Mcdonald Islands']['Encode']).reshape(-1,1)
y52=np.array(slicedata_antarctica_nona['Heard Island And Mcdonald Islands']['AverageTemperature'])
reg52=LinearRegression()
reg52.fit(x52,y52)

In [None]:
plt.scatter(x52,y52,color='red',alpha=0.05)
plt.plot(x52,reg52.predict(x52),color='blue')
plt.show()

### **Filling the NaN values of Europe** 

In [None]:
reg_list7=[reg51,reg52]

for country,reg in zip(['French Southern And Antarctic Lands','Heard Island And Mcdonald Islands'],reg_list7):
  for i,j in zip(list(slicedata_antarctica[country]['Encode']),slicedata_antarctica[country].index):
    if i not in list(slicedata_antarctica_nona[country]['Encode']):
      slicedata_antarctica[country].loc[j,'AverageTemperature']=reg.predict([[i]])

In [None]:
for country in slicedata_antarctica.keys():
  print(slicedata_antarctica[country]['AverageTemperature'].isna().sum())

# **Comparing samples**


##Data Manipulation

###Dropping the unnecessary columns

In [None]:
for country in sample_asia:
  slicedata_asia[country].drop(columns=['AverageTemperatureUncertainty','Year'],inplace=True)
  slicedata_asia[country]['Year']=pd.DatetimeIndex(slicedata_asia[country]['dt']).year

In [None]:
for country in sample_africa:
  slicedata_africa[country].drop(columns=['AverageTemperatureUncertainty','Year'],inplace=True)
  slicedata_africa[country]['Year']=pd.DatetimeIndex(slicedata_africa[country]['dt']).year

In [None]:
for country in sample_namerica:
  slicedata_namerica[country].drop(columns=['AverageTemperatureUncertainty','Year'],inplace=True)
  slicedata_namerica[country]['Year']=pd.DatetimeIndex(slicedata_namerica[country]['dt']).year

In [None]:
for country in sample_samerica:
  slicedata_samerica[country].drop(columns=['AverageTemperatureUncertainty','Year'],inplace=True)
  slicedata_samerica[country]['Year']=pd.DatetimeIndex(slicedata_samerica[country]['dt']).year

In [None]:
for country in sample_europe:
  slicedata_europe[country].drop(columns=['AverageTemperatureUncertainty','Year'],inplace=True)
  slicedata_europe[country]['Year']=pd.DatetimeIndex(slicedata_europe[country]['dt']).year

In [None]:
for country in sample_oceania:
  slicedata_oceania[country].drop(columns=['AverageTemperatureUncertainty','Year'],inplace=True)
  slicedata_oceania[country]['Year']=pd.DatetimeIndex(slicedata_oceania[country]['dt']).year

In [None]:
for country in ['French Southern And Antarctic Lands', 'Heard Island And Mcdonald Islands']:
  slicedata_antarctica[country].drop(columns=['AverageTemperatureUncertainty','Year'],inplace=True)
  slicedata_antarctica[country]['Year']=pd.DatetimeIndex(slicedata_antarctica[country]['dt']).year

###Calculating the yearly median

In [None]:
sliceaggdata_asia={}
for country in sample_asia:
  sliceaggdata_asia[country]=slicedata_asia[country].groupby(['Year']).agg({'AverageTemperature':'median'}).reset_index()

In [None]:
sliceaggdata_africa={}
for country in sample_africa:
  sliceaggdata_africa[country]=slicedata_africa[country].groupby(['Year']).agg({'AverageTemperature':'median'}).reset_index()

In [None]:
sliceaggdata_namerica={}
for country in sample_namerica:
  sliceaggdata_namerica[country]=slicedata_namerica[country].groupby(['Year']).agg({'AverageTemperature':'median'}).reset_index()

In [None]:
sliceaggdata_europe={}
for country in sample_europe:
  sliceaggdata_europe[country]=slicedata_europe[country].groupby(['Year']).agg({'AverageTemperature':'median'}).reset_index()

In [None]:
sliceaggdata_samerica={}
for country in sample_samerica:
  sliceaggdata_samerica[country]=slicedata_samerica[country].groupby(['Year']).agg({'AverageTemperature':'median'}).reset_index()

In [None]:
sliceaggdata_oceania={}
for country in sample_oceania:
  sliceaggdata_oceania[country]=slicedata_oceania[country].groupby(['Year']).agg({'AverageTemperature':'median'}).reset_index()

In [None]:
sliceaggdata_antarctica={}
for country in ['French Southern And Antarctic Lands', 'Heard Island And Mcdonald Islands']:
  sliceaggdata_antarctica[country]=slicedata_antarctica[country].groupby(['Year']).agg({'AverageTemperature':'median'}).reset_index()

###Plotting The Samples

####Asia

In [None]:
pprint.pprint(sample_asia)

In [None]:
plt.figure(figsize=[40,10])
plt.plot(sliceaggdata_asia['Tajikistan']['Year'],sliceaggdata_asia['Tajikistan']['AverageTemperature'],color='red',linestyle='-')
plt.plot(sliceaggdata_asia['India']['Year'],sliceaggdata_asia['India']['AverageTemperature'],color='blue',linestyle='-')
plt.plot(sliceaggdata_asia['Vietnam']['Year'],sliceaggdata_asia['Vietnam']['AverageTemperature'],color='green',linestyle='-')
plt.plot(sliceaggdata_asia['Lebanon']['Year'],sliceaggdata_asia['Lebanon']['AverageTemperature'],color='brown',linestyle='-')
plt.plot(sliceaggdata_asia['Afghanistan']['Year'],sliceaggdata_asia['Afghanistan']['AverageTemperature'],color='orange',linestyle='-')
plt.plot(sliceaggdata_asia['Oman']['Year'],sliceaggdata_asia['Oman']['AverageTemperature'],color='black',linestyle='-')
plt.plot(sliceaggdata_asia['Christmas Island']['Year'],sliceaggdata_asia['Christmas Island']['AverageTemperature'],color='deeppink',linestyle='-')
plt.plot(sliceaggdata_asia['Laos']['Year'],sliceaggdata_asia['Laos']['AverageTemperature'],color='midnightblue')
plt.plot(sliceaggdata_asia['Israel']['Year'],sliceaggdata_asia['Israel']['AverageTemperature'],color='violet')
plt.plot(sliceaggdata_asia['United Arab Emirates']['Year'],sliceaggdata_asia['United Arab Emirates']['AverageTemperature'],color='olive')
plt.title('Countrywise Plot for Asia')
plt.xlabel('Years')
plt.ylabel('Temperatures')
plt.legend(sample_asia)
plt.show()

####Africa

In [None]:
pprint.pprint(sample_africa)

In [None]:
plt.figure(figsize=[40,10])
plt.plot(sliceaggdata_africa['Mozambique']['Year'],sliceaggdata_africa['Mozambique']['AverageTemperature'],color='red',linestyle='-')
plt.plot(sliceaggdata_africa['Togo']['Year'],sliceaggdata_africa['Togo']['AverageTemperature'],color='blue',linestyle='-')
plt.plot(sliceaggdata_africa['Western Sahara']['Year'],sliceaggdata_africa['Western Sahara']['AverageTemperature'],color='green',linestyle='-')
plt.plot(sliceaggdata_africa['Mauritius']['Year'],sliceaggdata_africa['Mauritius']['AverageTemperature'],color='brown',linestyle='-')
plt.plot(sliceaggdata_africa['Tunisia']['Year'],sliceaggdata_africa['Tunisia']['AverageTemperature'],color='orange',linestyle='-')
plt.plot(sliceaggdata_africa['Libya']['Year'],sliceaggdata_africa['Libya']['AverageTemperature'],color='black',linestyle='-')
plt.plot(sliceaggdata_africa['Algeria']['Year'],sliceaggdata_africa['Algeria']['AverageTemperature'],color='deeppink',linestyle='-')
plt.plot(sliceaggdata_africa['Malawi']['Year'],sliceaggdata_africa['Malawi']['AverageTemperature'],color='midnightblue',linestyle='-')
plt.plot(sliceaggdata_africa['Angola']['Year'],sliceaggdata_africa['Angola']['AverageTemperature'],color='violet',linestyle='-')
plt.plot(sliceaggdata_africa['Senegal']['Year'],sliceaggdata_africa['Senegal']['AverageTemperature'],color='olive',linestyle='-')
plt.title('Countrywise Plot for Africa')
plt.xlabel('Years')
plt.ylabel('Temperatures')
plt.legend(sample_africa)
plt.show()

####North America

In [None]:
pprint.pprint(sample_namerica)

In [None]:
plt.figure(figsize=[40,10])
plt.plot(sliceaggdata_namerica['Puerto Rico']['Year'],sliceaggdata_namerica['Puerto Rico']['AverageTemperature'],color='red',linestyle='-')
plt.plot(sliceaggdata_namerica['United States']['Year'],sliceaggdata_namerica['United States']['AverageTemperature'],color='blue',linestyle='-')
plt.plot(sliceaggdata_namerica['Montserrat']['Year'],sliceaggdata_namerica['Montserrat']['AverageTemperature'],color='green',linestyle='-')
plt.plot(sliceaggdata_namerica['Mexico']['Year'],sliceaggdata_namerica['Mexico']['AverageTemperature'],color='brown',linestyle='-')
plt.plot(sliceaggdata_namerica['Belize']['Year'],sliceaggdata_namerica['Belize']['AverageTemperature'],color='orange',linestyle='-')
plt.plot(sliceaggdata_namerica['Canada']['Year'],sliceaggdata_namerica['Canada']['AverageTemperature'],color='black',linestyle='-')
plt.plot(sliceaggdata_namerica['Baker Island']['Year'],sliceaggdata_namerica['Baker Island']['AverageTemperature'],color='deeppink',linestyle='-')
plt.plot(sliceaggdata_namerica['Saint Barthélemy']['Year'],sliceaggdata_namerica['Saint Barthélemy']['AverageTemperature'],color='midnightblue',linestyle='-')
plt.plot(sliceaggdata_namerica['Trinidad And Tobago']['Year'],sliceaggdata_namerica['Trinidad And Tobago']['AverageTemperature'],color='violet',linestyle='-')
plt.plot(sliceaggdata_namerica['El Salvador']['Year'],sliceaggdata_namerica['El Salvador']['AverageTemperature'],color='olive',linestyle='-')
plt.title('Countrywise Plot for North America',size=20)
plt.xlabel('Years',size=20)
plt.ylabel('Temperatures',size=20)
plt.legend(sample_namerica)
plt.show()

####South America

In [None]:
pprint.pprint(sample_samerica)

In [None]:
plt.figure(figsize=[40,10])
plt.plot(sliceaggdata_samerica['Bolivia']['Year'],sliceaggdata_samerica['Bolivia']['AverageTemperature'],color='red',linestyle='-')
plt.plot(sliceaggdata_samerica['Falkland Islands (Islas Malvinas)']['Year'],sliceaggdata_samerica['Falkland Islands (Islas Malvinas)']['AverageTemperature'],color='blue',linestyle='-')
plt.plot(sliceaggdata_samerica['Guyana']['Year'],sliceaggdata_samerica['Guyana']['AverageTemperature'],color='green',linestyle='-')
plt.plot(sliceaggdata_samerica['Paraguay']['Year'],sliceaggdata_samerica['Paraguay']['AverageTemperature'],color='brown',linestyle='-')
plt.plot(sliceaggdata_samerica['Venezuela']['Year'],sliceaggdata_samerica['Venezuela']['AverageTemperature'],color='orange',linestyle='-')
plt.title('Countrywise Plot for South America',size=20)
plt.xlabel('Years',size=20)
plt.ylabel('Temperatures',size=20)
plt.legend(sample_samerica)
plt.show()

####Europe

In [None]:
pprint.pprint(sample_europe)

In [None]:
plt.figure(figsize=[40,10])
plt.plot(sliceaggdata_europe['Slovakia']['Year'],sliceaggdata_europe['Slovakia']['AverageTemperature'],color='red',linestyle='-')
plt.plot(sliceaggdata_europe['Moldova']['Year'],sliceaggdata_europe['Moldova']['AverageTemperature'],color='blue',linestyle='-')
plt.plot(sliceaggdata_europe['Portugal']['Year'],sliceaggdata_europe['Portugal']['AverageTemperature'],color='green',linestyle='-')
plt.plot(sliceaggdata_europe['Monaco']['Year'],sliceaggdata_europe['Monaco']['AverageTemperature'],color='brown',linestyle='-')
plt.plot(sliceaggdata_europe['Estonia']['Year'],sliceaggdata_europe['Estonia']['AverageTemperature'],color='orange',linestyle='-')
plt.plot(sliceaggdata_europe['Austria']['Year'],sliceaggdata_europe['Austria']['AverageTemperature'],color='black',linestyle='-')
plt.plot(sliceaggdata_europe['Malta']['Year'],sliceaggdata_europe['Malta']['AverageTemperature'],color='deeppink',linestyle='-')
plt.plot(sliceaggdata_europe['San Marino']['Year'],sliceaggdata_europe['San Marino']['AverageTemperature'],color='midnightblue',linestyle='-')
plt.plot(sliceaggdata_europe['Luxembourg']['Year'],sliceaggdata_europe['Luxembourg']['AverageTemperature'],color='violet',linestyle='-')
plt.plot(sliceaggdata_europe['Latvia']['Year'],sliceaggdata_europe['Latvia']['AverageTemperature'],color='olive',linestyle='-')
plt.title('Countrywise Plot for Europe',size=20)
plt.xlabel('Years',size=20)
plt.ylabel('Temperatures',size=20)
plt.legend(sample_europe)
plt.show()

####Oceania

In [None]:
pprint.pprint(sample_oceania)

In [None]:
plt.figure(figsize=[40,10])
plt.plot(sliceaggdata_oceania['Australia']['Year'],sliceaggdata_oceania['Australia']['AverageTemperature'],color='red',linestyle='-')
plt.plot(sliceaggdata_oceania['Kingman Reef']['Year'],sliceaggdata_oceania['Kingman Reef']['AverageTemperature'],color='blue',linestyle='-')
plt.plot(sliceaggdata_oceania['New Caledonia']['Year'],sliceaggdata_oceania['New Caledonia']['AverageTemperature'],color='green',linestyle='-')
plt.plot(sliceaggdata_oceania['New Zealand']['Year'],sliceaggdata_oceania['New Zealand']['AverageTemperature'],color='brown',linestyle='-')
plt.plot(sliceaggdata_oceania['Papua New Guinea']['Year'],sliceaggdata_oceania['Papua New Guinea']['AverageTemperature'],color='orange',linestyle='-')
plt.title('Countrywise Plot for Oceania',size=20)
plt.xlabel('Years',size=20)
plt.ylabel('Temperatures',size=20)
plt.legend(sample_oceania)
plt.show()

####Antarctica

In [None]:
pprint.pprint(sliceaggdata_antarctica.keys())

In [None]:
plt.figure(figsize=[40,10])
plt.plot(sliceaggdata_antarctica['French Southern And Antarctic Lands']['Year'],sliceaggdata_antarctica['French Southern And Antarctic Lands']['AverageTemperature'],color='red',linestyle='-')
plt.plot(sliceaggdata_antarctica['Heard Island And Mcdonald Islands']['Year'],sliceaggdata_antarctica['Heard Island And Mcdonald Islands']['AverageTemperature'],color='blue',linestyle='-')
plt.title('Countrywise Plot for Antarctica',size=20)
plt.xlabel('Years',size=20)
plt.ylabel('Temperatures',size=20)
plt.legend(sliceaggdata_antarctica.keys())
plt.show()

# **Trial and Error section**

In [None]:
new_temp=slicedata_asia['India']