# Temperature Population Sea Level and Emissions
### Notebook to combine .csv datasets into one sqlite database for Project 2

In [20]:
# Dependencies and Setup
import pandas as pd
import numpy as np
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

# File to Load  
temperature_data_to_load = "data/temp_data.csv"
population_data_to_load = "data/population_data.csv"
sealevel_data_to_load = "data/sealevel_data.csv"
co2_data_to_load = "data/CO2_data.csv"

# Read Data  
temperature_data = pd.read_csv(temperature_data_to_load)
population_data = pd.read_csv(population_data_to_load)
sealevel_data = pd.read_csv(sealevel_data_to_load)
co2_data = pd.read_csv(co2_data_to_load)


In [21]:
#Temp Data Molly C.
temperature_data.head(5)

Unnamed: 0,Year,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty
0,1993,15.466417,0.059167
1,1994,15.535,0.058583
2,1995,15.637833,0.060417
3,1996,15.524667,0.0595
4,1997,15.713833,0.059167


In [22]:
#Population Data John B. 
population_data.head(5)

Unnamed: 0.1,Unnamed: 0,Year,population_millions
0,0,1993,5581597.598
1,1,1994,5663150.428
2,2,1995,5744212.93
3,3,1996,5824891.931
4,4,1997,5905045.647


In [23]:
#Sealevel Data Melissa W. 
sealevel_data.head(5)

Unnamed: 0,Year,GMSL
0,1993,90.471622
1,1994,89.516216
2,1995,87.334722
3,1996,85.473243
4,1997,99.442432


In [24]:
#C02 Data - Kat A. 
co2_data.head(5)

Unnamed: 0,Year,CO2_emissions_tons_per_capita
0,1993,869.636645
1,1994,863.100924
2,1995,858.225479
3,1996,859.062572
4,1997,867.379574


In [25]:
# Use Glob to concat
import matplotlib.pyplot as plt
from glob import glob


In [26]:
# Combine Files 
climate_files = sorted(glob('data/*_data.csv'))

In [27]:
# Review Data 
climate_files

['data\\CO2_data.csv',
 'data\\population_data.csv',
 'data\\sealevel_data.csv',
 'data\\temp_data.csv']

In [28]:
# Concat Data in Files 
climate_data = pd.concat((pd.read_csv(file).assign(filename = file)
          for file in climate_files), sort= True,  axis=1) 

climate_data

Unnamed: 0.1,Year,CO2_emissions_tons_per_capita,filename,Unnamed: 0,Year.1,population_millions,filename.1,Year.2,GMSL,filename.2,Year.3,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty,filename.3
0,1993,869.636645,data\CO2_data.csv,0,1993,5581597.598,data\population_data.csv,1993,90.471622,data\sealevel_data.csv,1993,15.466417,0.059167,data\temp_data.csv
1,1994,863.100924,data\CO2_data.csv,1,1994,5663150.428,data\population_data.csv,1994,89.516216,data\sealevel_data.csv,1994,15.535,0.058583,data\temp_data.csv
2,1995,858.225479,data\CO2_data.csv,2,1995,5744212.93,data\population_data.csv,1995,87.334722,data\sealevel_data.csv,1995,15.637833,0.060417,data\temp_data.csv
3,1996,859.062572,data\CO2_data.csv,3,1996,5824891.931,data\population_data.csv,1996,85.473243,data\sealevel_data.csv,1996,15.524667,0.0595,data\temp_data.csv
4,1997,867.379574,data\CO2_data.csv,4,1997,5905045.647,data\population_data.csv,1997,99.442432,data\sealevel_data.csv,1997,15.713833,0.059167,data\temp_data.csv
5,1998,866.162913,data\CO2_data.csv,5,1998,5984794.075,data\population_data.csv,1998,96.977297,data\sealevel_data.csv,1998,15.826,0.063,data\temp_data.csv
6,1999,854.147358,data\CO2_data.csv,6,1999,6064239.033,data\population_data.csv,1999,87.769459,data\sealevel_data.csv,1999,15.600333,0.063333,data\temp_data.csv
7,2000,878.764146,data\CO2_data.csv,7,2000,6143493.806,data\population_data.csv,2000,86.657297,data\sealevel_data.csv,2000,15.610667,0.0635,data\temp_data.csv
8,2001,888.096012,data\CO2_data.csv,8,2001,6222626.531,data\population_data.csv,2001,84.688378,data\sealevel_data.csv,2001,15.7675,0.064583,data\temp_data.csv
9,2002,884.272209,data\CO2_data.csv,9,2002,6301773.172,data\population_data.csv,2002,85.895278,data\sealevel_data.csv,2002,15.829167,0.062917,data\temp_data.csv


In [37]:
#Selecting the columns we want 
climate_combined = climate_data[['Year', 
                            'CO2_emissions_tons_per_capita',
                            'population_millions', 
                            'GMSL', 
                            'LandAndOceanAverageTemperature',
                            'LandAndOceanAverageTemperatureUncertainty']]
climate_combined

Unnamed: 0,Year,Year.1,Year.2,Year.3,CO2_emissions_tons_per_capita,population_millions,GMSL,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty
0,1993,1993,1993,1993,869.636645,5581597.598,90.471622,15.466417,0.059167
1,1994,1994,1994,1994,863.100924,5663150.428,89.516216,15.535,0.058583
2,1995,1995,1995,1995,858.225479,5744212.93,87.334722,15.637833,0.060417
3,1996,1996,1996,1996,859.062572,5824891.931,85.473243,15.524667,0.0595
4,1997,1997,1997,1997,867.379574,5905045.647,99.442432,15.713833,0.059167
5,1998,1998,1998,1998,866.162913,5984794.075,96.977297,15.826,0.063
6,1999,1999,1999,1999,854.147358,6064239.033,87.769459,15.600333,0.063333
7,2000,2000,2000,2000,878.764146,6143493.806,86.657297,15.610667,0.0635
8,2001,2001,2001,2001,888.096012,6222626.531,84.688378,15.7675,0.064583
9,2002,2002,2002,2002,884.272209,6301773.172,85.895278,15.829167,0.062917


In [38]:
#Re-Check the columns 
climate_combined.columns

Index(['Year', 'Year', 'Year', 'Year', 'CO2_emissions_tons_per_capita', 'population_millions', 'GMSL', 'LandAndOceanAverageTemperature', 'LandAndOceanAverageTemperatureUncertainty'], dtype='object')

ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [31]:
new_climate_data = climate_combined.drop_duplicates(subset ="Year", 
                     keep = False, inplace = True) 

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [33]:
# displaying data 
new_climate_data.columns

AttributeError: 'NoneType' object has no attribute 'columns'

In [None]:
climate_combined.drop(climate_combined.columns[[2,3]], axis = 1)

In [None]:
climate_climate_data = climate_combined

climate_climate_data.rename(columns={'Year':'year',  
                              'Avg Temp':'LandAndOceanAverageTemperature',
                             'Emissions':'CO2_emissions_tons_per_capita',
                             'GMSL':'Avg GMSL',
                             'Avg Population in mil':'population_millions'} )
climate_data

In [None]:
#Save it all into one file
climate_combined.to_csv('data\combinedclimatedata.csv')
