In [1]:
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

In [3]:
file_path = "../Resources/GlobalLandTemperaturesByState.csv"
global_temp_df = pd.read_csv(file_path)
global_temp_df.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,State,Country
0,1855-05-01,25.544,1.171,Acre,Brazil
1,1855-06-01,24.228,1.103,Acre,Brazil
2,1855-07-01,24.371,1.044,Acre,Brazil
3,1855-08-01,25.427,1.073,Acre,Brazil
4,1855-09-01,25.675,1.014,Acre,Brazil


In [4]:
# cast to to datetime
dt= lambda df_: pd.to_datetime(df_['dt'])

In [5]:
# Extract average Temp across United States since the begining of the 19th Century
global_temp_df = global_temp_df.loc[(global_temp_df['dt'] >= '1930-01-01')]
global_temp_df


Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,State,Country
896,1930-01-01,25.583,0.544,Acre,Brazil
897,1930-02-01,25.887,0.725,Acre,Brazil
898,1930-03-01,25.871,0.775,Acre,Brazil
899,1930-04-01,25.414,0.367,Acre,Brazil
900,1930-05-01,25.066,0.467,Acre,Brazil
...,...,...,...,...,...
645670,2013-05-01,21.634,0.578,Zhejiang,China
645671,2013-06-01,24.679,0.596,Zhejiang,China
645672,2013-07-01,29.272,1.340,Zhejiang,China
645673,2013-08-01,29.202,0.869,Zhejiang,China


In [6]:
# drop the null values
cleanglobal_temp_df = global_temp_df.dropna()
cleanglobal_temp_df.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,State,Country
896,1930-01-01,25.583,0.544,Acre,Brazil
897,1930-02-01,25.887,0.725,Acre,Brazil
898,1930-03-01,25.871,0.775,Acre,Brazil
899,1930-04-01,25.414,0.367,Acre,Brazil
900,1930-05-01,25.066,0.467,Acre,Brazil


In [7]:
# Make a copy of the data for future graphs
copy = cleanglobal_temp_df.copy()
# check the unique coloumn
cleanglobal_temp_df.nunique()


dt                                1005
AverageTemperature               71290
AverageTemperatureUncertainty     1951
State                              241
Country                              7
dtype: int64

In [8]:
cleanglobal_temp_df.dtypes

dt                                object
AverageTemperature               float64
AverageTemperatureUncertainty    float64
State                             object
Country                           object
dtype: object

In [9]:
cleanglobal_temp_df.notnull().sum()


dt                               242024
AverageTemperature               242024
AverageTemperatureUncertainty    242024
State                            242024
Country                          242024
dtype: int64

In [10]:
# Recode the Columns
cleanglobal_temp_df = cleanglobal_temp_df.rename(columns={"dt":"Date",
                                                  "AverageTemperature":"Avg_temp",
                                                  "AverageTemperatureUncertainty":"Avg_temp_Uncer",
                                                  "State":"State",
                                                  "Country":"Country"})
cleanglobal_temp_df


Unnamed: 0,Date,Avg_temp,Avg_temp_Uncer,State,Country
896,1930-01-01,25.583,0.544,Acre,Brazil
897,1930-02-01,25.887,0.725,Acre,Brazil
898,1930-03-01,25.871,0.775,Acre,Brazil
899,1930-04-01,25.414,0.367,Acre,Brazil
900,1930-05-01,25.066,0.467,Acre,Brazil
...,...,...,...,...,...
645669,2013-04-01,15.710,0.461,Zhejiang,China
645670,2013-05-01,21.634,0.578,Zhejiang,China
645671,2013-06-01,24.679,0.596,Zhejiang,China
645672,2013-07-01,29.272,1.340,Zhejiang,China


In [11]:
# Format the columns.
cleanglobal_temp_df["Avg_temp"] = cleanglobal_temp_df["Avg_temp"].map("{:.2f}".format)
cleanglobal_temp_df["Avg_temp_Uncer"] = cleanglobal_temp_df["Avg_temp_Uncer"].map("{:.2f}".format)
cleanglobal_temp_df

Unnamed: 0,Date,Avg_temp,Avg_temp_Uncer,State,Country
896,1930-01-01,25.58,0.54,Acre,Brazil
897,1930-02-01,25.89,0.72,Acre,Brazil
898,1930-03-01,25.87,0.78,Acre,Brazil
899,1930-04-01,25.41,0.37,Acre,Brazil
900,1930-05-01,25.07,0.47,Acre,Brazil
...,...,...,...,...,...
645669,2013-04-01,15.71,0.46,Zhejiang,China
645670,2013-05-01,21.63,0.58,Zhejiang,China
645671,2013-06-01,24.68,0.60,Zhejiang,China
645672,2013-07-01,29.27,1.34,Zhejiang,China


In [12]:
# Save in the Resources
cleanglobal_temp_df.to_csv(index=True)
filepath = Path('Resources/cleanglobal_temp.csv')  
filepath.parent.mkdir(parents=True, exist_ok=True)  
cleanglobal_temp_df.to_csv(filepath)


In [13]:
from sqlalchemy import create_engine

In [14]:
from config import db_password

In [15]:
db_string = f"postgresql://postgres:{db_password}@127.0.0.1:5432/climate_change_db"

In [16]:
engine = create_engine(db_string)

In [20]:
cleanglobal_temp_df.to_sql(name='cleanglobal_temp', con=engine)