In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# This magic line will allow you to generate plots
# within the Jupyter notebook.
%matplotlib inline

#this adds some extra styling to the graphs we will make
sns.set_style("whitegrid")

In [3]:
df=pd.read_csv("GlobalLandTemperatures_GlobalTemperatures.csv")

In [4]:
df.shape


(3192, 9)

In [5]:
df.columns

Index(['dt', 'LandAverageTemperature', 'LandAverageTemperatureUncertainty',
       'LandMaxTemperature', 'LandMaxTemperatureUncertainty',
       'LandMinTemperature', 'LandMinTemperatureUncertainty',
       'LandAndOceanAverageTemperature',
       'LandAndOceanAverageTemperatureUncertainty'],
      dtype='object')

In [6]:
df['dt'] = pd.to_datetime(df['dt'])

In [7]:
df['year'] = df['dt'].dt.year
df['month'] = df['dt'].dt.month
df['day'] = df['dt'].dt.day

In [8]:
df.columns

Index(['dt', 'LandAverageTemperature', 'LandAverageTemperatureUncertainty',
       'LandMaxTemperature', 'LandMaxTemperatureUncertainty',
       'LandMinTemperature', 'LandMinTemperatureUncertainty',
       'LandAndOceanAverageTemperature',
       'LandAndOceanAverageTemperatureUncertainty', 'year', 'month', 'day'],
      dtype='object')

In [9]:
df['year'].head(10)

0    1750
1    1750
2    1750
3    1750
4    1750
5    1750
6    1750
7    1750
8    1750
9    1750
Name: year, dtype: int64

In [10]:

df.drop(df[df['year']<1901].index, inplace=True)


In [11]:
df.shape

(1380, 12)

In [12]:
df['dt'].isna().sum()

0

In [13]:
avg_yearly=df[['LandAverageTemperature','year']]\
.groupby(['year'],as_index=False).mean()

In [23]:
avg_yearly.columns=['year','AverageYearlyTemp']

In [15]:
avg_yearly

Unnamed: 0,year,AverageYearlyTemp
0,1901,8.541917
1,1902,8.304417
2,1903,8.220167
3,1904,8.090917
4,1905,8.225167
...,...,...
110,2011,9.516000
111,2012,9.507333
112,2013,9.606500
113,2014,9.570667


In [16]:
df=pd.merge(df,avg_yearly,on='year')

In [17]:
df

Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty,year,month,day,AverageYearlyTemp
0,1901-01-01,2.191,0.291,7.996,0.604,-3.491,0.490,13.290,0.141,1901,1,1,8.541917
1,1901-01-02,3.092,0.418,9.038,0.371,-2.678,0.539,13.660,0.163,1901,1,2,8.541917
2,1901-01-03,5.671,0.251,11.476,0.370,-0.644,0.395,14.383,0.138,1901,1,3,8.541917
3,1901-01-04,8.522,0.326,14.733,0.368,2.264,0.376,15.220,0.151,1901,1,4,8.541917
4,1901-01-05,11.178,0.339,17.315,0.397,4.855,0.390,15.936,0.154,1901,1,5,8.541917
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1375,2015-01-08,14.755,0.072,20.699,0.110,9.005,0.170,17.589,0.057,2015,1,8,9.831000
1376,2015-01-09,12.999,0.079,18.845,0.088,7.199,0.229,17.049,0.058,2015,1,9,9.831000
1377,2015-01-10,10.801,0.102,16.450,0.059,5.232,0.115,16.290,0.062,2015,1,10,9.831000
1378,2015-01-11,7.433,0.119,12.892,0.093,2.157,0.106,15.252,0.063,2015,1,11,9.831000


In [18]:
df.to_csv('Global_Temp.csv', sep=',')

In [19]:
avg_yearly.to_csv('GlobalYearlyLand_Temp.csv', sep=',')

In [20]:
df[['year','LandAndOceanAverageTemperature','LandAverageTemperature']]

Unnamed: 0,year,LandAndOceanAverageTemperature,LandAverageTemperature
0,1901,13.290,2.191
1,1901,13.660,3.092
2,1901,14.383,5.671
3,1901,15.220,8.522
4,1901,15.936,11.178
...,...,...,...
1375,2015,17.589,14.755
1376,2015,17.049,12.999
1377,2015,16.290,10.801
1378,2015,15.252,7.433


In [21]:
avg_yearly_landOcean=df[['LandAndOceanAverageTemperature','year']]\
.groupby(['year'],as_index=False).mean()

In [24]:
avg_yearly_landOcean.columns=['year','AverageYearlyLandOceanTemp']

In [25]:
avg_yearly_landOcean

Unnamed: 0,year,AverageYearlyLandOceanTemp
0,1901,15.073333
1,1902,14.958333
2,1903,14.836583
3,1904,14.810417
4,1905,14.954667
...,...,...
110,2011,15.769500
111,2012,15.802333
112,2013,15.854417
113,2014,15.913000


In [26]:
final=pd.merge(avg_yearly,avg_yearly_landOcean,on='year')

In [27]:
final

Unnamed: 0,year,AverageYearlyTemp,AverageYearlyLandOceanTemp
0,1901,8.541917,15.073333
1,1902,8.304417,14.958333
2,1903,8.220167,14.836583
3,1904,8.090917,14.810417
4,1905,8.225167,14.954667
...,...,...,...
110,2011,9.516000,15.769500
111,2012,9.507333,15.802333
112,2013,9.606500,15.854417
113,2014,9.570667,15.913000


In [28]:
final.to_csv('Global_Land_LandOcean_Temp.csv', sep=',')