In [70]:
# Import the required libraries and dependencies
import pandas as pd
import hvplot.pandas
from pathlib import Path

# Climate Change Analysis

### Import Data

In [71]:
# Using the read_csv function and Path module, create a DataFrame 
# by importing the GlobalLandTemperatureByCountry.csv, GlobalLandTemperatureByMajorCity.csv and GlobalTemperature.csv file from the Resources folder
temperature_by_country_df = pd.read_csv(Path("./Resources/GlobalLandTemperaturesByCountry.csv"))
temperature_by_major_city_df = pd.read_csv(Path("./Resources/GlobalLandTemperaturesByMajorCity.csv"))
global_temperature_df = pd.read_csv(Path("./Resources/GlobalTemperatures.csv"))


In [72]:
# Review the first and last five rows of the temperature_by_country_df DataFrame
display(temperature_by_country_df.head())
display(temperature_by_country_df.tail())

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
0,1743-11-01,4.384,2.294,Åland
1,1743-12-01,,,Åland
2,1744-01-01,,,Åland
3,1744-02-01,,,Åland
4,1744-03-01,,,Åland


Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
577457,2013-05-01,19.059,1.022,Zimbabwe
577458,2013-06-01,17.613,0.473,Zimbabwe
577459,2013-07-01,17.0,0.453,Zimbabwe
577460,2013-08-01,19.759,0.717,Zimbabwe
577461,2013-09-01,,,Zimbabwe


In [73]:
# Review the first and last five rows of the temperature_by_major_city_df DataFrame
display(temperature_by_major_city_df.head())
display(temperature_by_major_city_df.tail())

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude
0,1849-01-01,26.704,1.435,Abidjan,Côte D'Ivoire,5.63N,3.23W
1,1849-02-01,27.434,1.362,Abidjan,Côte D'Ivoire,5.63N,3.23W
2,1849-03-01,28.101,1.612,Abidjan,Côte D'Ivoire,5.63N,3.23W
3,1849-04-01,26.14,1.387,Abidjan,Côte D'Ivoire,5.63N,3.23W
4,1849-05-01,25.427,1.2,Abidjan,Côte D'Ivoire,5.63N,3.23W


Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude
239172,2013-05-01,18.979,0.807,Xian,China,34.56N,108.97E
239173,2013-06-01,23.522,0.647,Xian,China,34.56N,108.97E
239174,2013-07-01,25.251,1.042,Xian,China,34.56N,108.97E
239175,2013-08-01,24.528,0.84,Xian,China,34.56N,108.97E
239176,2013-09-01,,,Xian,China,34.56N,108.97E


In [74]:
# Review the first and last five rows of the global_temperature_df DataFrame
display(global_temperature_df.head())
display(global_temperature_df.tail())

Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty
0,1750-01-01,3.034,3.574,,,,,,
1,1750-02-01,3.083,3.702,,,,,,
2,1750-03-01,5.626,3.076,,,,,,
3,1750-04-01,8.49,2.451,,,,,,
4,1750-05-01,11.573,2.072,,,,,,


Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty
3187,2015-08-01,14.755,0.072,20.699,0.11,9.005,0.17,17.589,0.057
3188,2015-09-01,12.999,0.079,18.845,0.088,7.199,0.229,17.049,0.058
3189,2015-10-01,10.801,0.102,16.45,0.059,5.232,0.115,16.29,0.062
3190,2015-11-01,7.433,0.119,12.892,0.093,2.157,0.106,15.252,0.063
3191,2015-12-01,5.518,0.1,10.725,0.154,0.287,0.099,14.774,0.062


### Clean Data

In [75]:
# Format dt column as Datetime data format
global_temperature_df['dt'] = pd.to_datetime(global_temperature_df['dt'], infer_datetime_format=True)

# Set time as index 
global_temperature_df = global_temperature_df.set_index('dt')

In [76]:
# Drop unnecessary columns
global_temperature_df = global_temperature_df[['LandAverageTemperature', 'LandAndOceanAverageTemperature']]

# Seperate LandAverage and LandAndOceanAverage since LandAndOceanAverage has fewer values
land_global_temperature_df = global_temperature_df['LandAverageTemperature'].dropna()
all_global_temperature_df = global_temperature_df['LandAndOceanAverageTemperature'].dropna()

# Review the lengths of the two resulting DataFrames
display(len(land_global_temperature_df.index))
display(len(all_global_temperature_df.index))

3180

1992

In [77]:
# Review the first 5 columns of each DataFrame
display(land_global_temperature_df.head())
display(all_global_temperature_df.head())

dt
1750-01-01     3.034
1750-02-01     3.083
1750-03-01     5.626
1750-04-01     8.490
1750-05-01    11.573
Name: LandAverageTemperature, dtype: float64

dt
1850-01-01    12.833
1850-02-01    13.588
1850-03-01    14.043
1850-04-01    14.667
1850-05-01    15.507
Name: LandAndOceanAverageTemperature, dtype: float64

In [81]:
land_global_temperature_df.hvplot()

As seen from the following screenshot, seasons have more variation than the overall trend of land temperature.  Due to this, we will look at the mean average temperature per year, and use this information to evaluate overall trends. 

![A screenshot depicting the impact of seasons on overall trend analysis.](./Images/land_average_temp_no_average.PNG)

land_global_temperature.groupby(land_global_temperature)

df['dates'] = pd.to_datetime(df.dates)
df.groupby(df.dates.dt.year)['vi'].transform('mean')

testing