In [24]:
#importing required libraries

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

from datetime import date, datetime

In [2]:
#feeding in csv path with global land temp data from all countries measured monthly since 1743.
csv_path = "GlobalLandTemperaturesByCountry.csv"
country_temps = pd.read_csv(csv_path)
country_temps.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
0,1743-11-01,4.384,2.294,Åland
1,1743-12-01,,,Åland
2,1744-01-01,,,Åland
3,1744-02-01,,,Åland
4,1744-03-01,,,Åland


In [3]:
#use count to check for missing entries
country_temps.count()

dt                               577462
AverageTemperature               544811
AverageTemperatureUncertainty    545550
Country                          577462
dtype: int64

In [4]:
#check data types
country_temps.dtypes

dt                                object
AverageTemperature               float64
AverageTemperatureUncertainty    float64
Country                           object
dtype: object

In [5]:
#find column names
country_temps = pd.DataFrame(country_temps) 
country_temps.columns

Index(['dt', 'AverageTemperature', 'AverageTemperatureUncertainty', 'Country'], dtype='object')

In [6]:
#renaming columns
country_temps = country_temps.rename(columns={"dt": "Reading Date", "AverageTemperature":"Average Temperature, Celsius", "AverageTemperatureUncertainty": "Average Temperature Uncertainty", "Country": "Country"})

country_temps.head(10)

Unnamed: 0,Reading Date,"Average Temperature, Celsius",Average Temperature Uncertainty,Country
0,1743-11-01,4.384,2.294,Åland
1,1743-12-01,,,Åland
2,1744-01-01,,,Åland
3,1744-02-01,,,Åland
4,1744-03-01,,,Åland
5,1744-04-01,1.53,4.68,Åland
6,1744-05-01,6.702,1.789,Åland
7,1744-06-01,11.609,1.577,Åland
8,1744-07-01,15.342,1.41,Åland
9,1744-08-01,,,Åland


In [7]:
#Removes lines without temp. readings

dropna_country_temps = country_temps.dropna(subset=['Average Temperature, Celsius'])
dropna_country_temps.head(10)

Unnamed: 0,Reading Date,"Average Temperature, Celsius",Average Temperature Uncertainty,Country
0,1743-11-01,4.384,2.294,Åland
5,1744-04-01,1.53,4.68,Åland
6,1744-05-01,6.702,1.789,Åland
7,1744-06-01,11.609,1.577,Åland
8,1744-07-01,15.342,1.41,Åland
10,1744-09-01,11.702,1.517,Åland
11,1744-10-01,5.477,1.862,Åland
12,1744-11-01,3.407,1.425,Åland
13,1744-12-01,-2.181,1.641,Åland
14,1745-01-01,-3.85,1.841,Åland


In [8]:
#all the columns have the same amount of data!

dropna_country_temps.count()

Reading Date                       544811
Average Temperature, Celsius       544811
Average Temperature Uncertainty    544811
Country                            544811
dtype: int64

In [9]:
#listing out names of countries w temp recordings
dropna_country_temps.Country.unique()

array(['Åland', 'Afghanistan', 'Africa', 'Albania', 'Algeria',
       'American Samoa', 'Andorra', 'Angola', 'Anguilla',
       'Antigua And Barbuda', 'Argentina', 'Armenia', 'Aruba', 'Asia',
       'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain',
       'Baker Island', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium',
       'Belize', 'Benin', 'Bhutan', 'Bolivia',
       'Bonaire, Saint Eustatius And Saba', 'Bosnia And Herzegovina',
       'Botswana', 'Brazil', 'British Virgin Islands', 'Bulgaria',
       'Burkina Faso', 'Burma', 'Burundi', "Côte D'Ivoire", 'Cambodia',
       'Cameroon', 'Canada', 'Cape Verde', 'Cayman Islands',
       'Central African Republic', 'Chad', 'Chile', 'China',
       'Christmas Island', 'Colombia', 'Comoros',
       'Congo (Democratic Republic Of The)', 'Congo', 'Costa Rica',
       'Croatia', 'Cuba', 'Curaçao', 'Cyprus', 'Czech Republic',
       'Denmark (Europe)', 'Denmark', 'Djibouti', 'Dominica',
       'Dominican Republic', 'Ecuador', 'Egypt'

In [10]:
#indexing data by country name
df = dropna_country_temps.set_index("Country")
df

Unnamed: 0_level_0,Reading Date,"Average Temperature, Celsius",Average Temperature Uncertainty
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Åland,1743-11-01,4.384,2.294
Åland,1744-04-01,1.530,4.680
Åland,1744-05-01,6.702,1.789
Åland,1744-06-01,11.609,1.577
Åland,1744-07-01,15.342,1.410
...,...,...,...
Zimbabwe,2013-04-01,21.142,0.495
Zimbabwe,2013-05-01,19.059,1.022
Zimbabwe,2013-06-01,17.613,0.473
Zimbabwe,2013-07-01,17.000,0.453


In [11]:
#removes dashes from dates and converts them to int
dasheddates = df["Reading Date"]

df["Reading Date"] = df["Reading Date"].replace('-', '', regex=True)


df["Reading Date"] = df["Reading Date"].astype(int)

df.dtypes

Reading Date                         int64
Average Temperature, Celsius       float64
Average Temperature Uncertainty    float64
dtype: object

In [12]:
#isolates data from 20 cacao bean growing nations that produced beans for the best rated chocolate bars, as determined in the previous jupyter notebook
df = df.loc[['Belize', 'Bolivia', 'Brazil', 'Cameroon', 'Carribean', 'Colombia', 'Congo', 'Costa Rica', 'Cuba', 'Dominican Repoublic', 'Ecuador', 'Fiji', 'Ghana', 'Grenada', 'Guatemala', 'Hawaii', 'Honduras', 'India', 'Indonesia', 'Jamaica'], ["Reading Date", "Average Temperature, Celsius"]]

df

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike
  return self._getitem_tuple(key)


Unnamed: 0_level_0,Reading Date,"Average Temperature, Celsius"
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Belize,18250101.0,22.842
Belize,18250201.0,23.351
Belize,18250301.0,24.702
Belize,18250401.0,26.901
Belize,18250501.0,27.041
...,...,...
Jamaica,20130501.0,27.603
Jamaica,20130601.0,28.075
Jamaica,20130701.0,28.206
Jamaica,20130801.0,28.087


In [13]:
#finds all readings for our top countries after 2006 (which is the year of the first chocolate bar review in our other data set, our last review was written in 2017. Unfortunately, our weather data only goes to 2013 so we are effectively interpreting climate data from Jan 2006 to Sep 2013.)

daterange = df.loc[df["Reading Date"] > 20060000]
daterange

Unnamed: 0_level_0,Reading Date,"Average Temperature, Celsius"
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Belize,20060101.0,22.969
Belize,20060201.0,23.225
Belize,20060301.0,25.459
Belize,20060401.0,26.855
Belize,20060501.0,27.501
...,...,...
Jamaica,20130501.0,27.603
Jamaica,20130601.0,28.075
Jamaica,20130701.0,28.206
Jamaica,20130801.0,28.087


In [14]:
#converting the reading dates into datetime type

pd.to_datetime(daterange["Reading Date"], format='%Y%m%d', errors='coerce')

daterange.head()

Unnamed: 0_level_0,Reading Date,"Average Temperature, Celsius"
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Belize,20060101.0,22.969
Belize,20060201.0,23.225
Belize,20060301.0,25.459
Belize,20060401.0,26.855
Belize,20060501.0,27.501


In [15]:
daterange.dtypes

Reading Date                    float64
Average Temperature, Celsius    float64
dtype: object

In [16]:
daterange.groupby("Reading Date")
daterange

Unnamed: 0_level_0,Reading Date,"Average Temperature, Celsius"
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Belize,20060101.0,22.969
Belize,20060201.0,23.225
Belize,20060301.0,25.459
Belize,20060401.0,26.855
Belize,20060501.0,27.501
...,...,...
Jamaica,20130501.0,27.603
Jamaica,20130601.0,28.075
Jamaica,20130701.0,28.206
Jamaica,20130801.0,28.087


In [17]:
#converts reading date to datetime format and separates out into years and months.
#renamed months to names and cleaned look of table

daterange['DateTime'] = daterange['Reading Date'].apply(lambda x: pd.to_datetime(str(x), format='%Y%m%d'))
daterange['Year'] = daterange['DateTime'].dt.year
daterange['Month'] = daterange['DateTime'].dt.month


monthnames = {1:'Jan', 2:'Feb', 3:'Mar', 4:'Apr', 5:'May', 6:'Jun', 7:'Jul', 8:'Aug', 9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'}
daterange = daterange.replace({"Month":monthnames})

del daterange["Reading Date"]

daterange.dtypes

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Average Temperature, Celsius           float64
DateTime                        datetime64[ns]
Year                                     int64
Month                                   object
dtype: object

In [18]:
#pivot table summarizes country temp. information by month, year

yearlyclimatedata =pd.pivot_table(daterange,values=['Average Temperature, Celsius'],index=['Year'],columns=['Country'])
yearlyclimatedata

Unnamed: 0_level_0,"Average Temperature, Celsius","Average Temperature, Celsius","Average Temperature, Celsius","Average Temperature, Celsius","Average Temperature, Celsius","Average Temperature, Celsius","Average Temperature, Celsius","Average Temperature, Celsius","Average Temperature, Celsius","Average Temperature, Celsius","Average Temperature, Celsius","Average Temperature, Celsius","Average Temperature, Celsius","Average Temperature, Celsius","Average Temperature, Celsius","Average Temperature, Celsius","Average Temperature, Celsius"
Country,Belize,Bolivia,Brazil,Cameroon,Colombia,Congo,Costa Rica,Cuba,Ecuador,Fiji,Ghana,Grenada,Guatemala,Honduras,India,Indonesia,Jamaica
Year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2
2006,25.7085,21.47,25.517,25.293417,25.476583,25.282667,26.514917,26.1545,22.603167,25.6315,27.581583,27.743667,23.80825,25.589917,24.732083,26.38175,27.034167
2007,25.85875,21.129333,25.668,25.209833,25.494,25.22975,26.382917,26.27975,22.35975,26.055333,27.627667,27.591,23.8185,25.383417,24.649917,26.293083,27.013917
2008,25.524667,21.189,25.410333,25.121083,25.153917,25.292417,26.056083,26.05475,22.22425,25.730167,27.367833,27.363667,23.482083,25.107833,24.406,26.06525,26.6215
2009,25.90775,21.378333,25.600583,25.407333,25.618333,25.34925,26.522917,26.022333,22.677167,25.227417,27.557583,27.62675,23.939917,25.518583,25.146667,26.455,26.860167
2010,25.502333,21.446417,25.812417,25.46475,25.687667,25.559417,26.22675,25.627333,22.639917,25.9185,27.935,28.108917,23.687583,25.245667,25.050833,26.537,26.52125
2011,25.84275,21.555417,25.428917,24.876583,25.246917,24.775083,26.113417,26.153,22.336667,25.722833,27.59975,27.662083,23.820833,25.47425,24.415583,26.20225,26.640583
2012,25.734417,21.607167,25.717083,24.877,25.381333,24.8735,26.282,26.059,22.445917,25.47825,27.2795,27.636417,23.848167,25.260583,24.640833,26.322083,26.885083
2013,26.213333,20.640875,25.348125,25.307625,25.5795,25.139375,26.457556,26.250444,22.264125,25.941375,27.7175,27.612875,24.291333,25.732333,25.41325,26.467,27.125333


In [19]:
list(yearlyclimatedata.columns)

[('Average Temperature, Celsius', 'Belize'),
 ('Average Temperature, Celsius', 'Bolivia'),
 ('Average Temperature, Celsius', 'Brazil'),
 ('Average Temperature, Celsius', 'Cameroon'),
 ('Average Temperature, Celsius', 'Colombia'),
 ('Average Temperature, Celsius', 'Congo'),
 ('Average Temperature, Celsius', 'Costa Rica'),
 ('Average Temperature, Celsius', 'Cuba'),
 ('Average Temperature, Celsius', 'Ecuador'),
 ('Average Temperature, Celsius', 'Fiji'),
 ('Average Temperature, Celsius', 'Ghana'),
 ('Average Temperature, Celsius', 'Grenada'),
 ('Average Temperature, Celsius', 'Guatemala'),
 ('Average Temperature, Celsius', 'Honduras'),
 ('Average Temperature, Celsius', 'India'),
 ('Average Temperature, Celsius', 'Indonesia'),
 ('Average Temperature, Celsius', 'Jamaica')]

In [25]:
listofdates = daterange["Year"]


plt.title("Average Temperature in Top Cacao Producing Countries, 2006-2013")
plt.xlabel("Year")
plt.ylabel("Temperature, Celsius"

plt.plot(listofdates,yearlyclimatedata[('Average Temperature, Celsius','Belize')], label="Belize")
plt.plot(listofdates,yearlyclimatedata[('Average Temperature, Celsius','Bolivia')] , label="Bolivia")
plt.plot(listofdates,yearlyclimatedata[('Average Temperature, Celsius','Brazil')] , label="Brazil")
plt.grid()
#plt.legend()
#plt.savefig("test.png")
plt.show()

SyntaxError: invalid syntax (<ipython-input-25-22247bb22e68>, line 8)

In [None]:
#pivot table summarizes country temp. information comparing each month every year
monthclimatedata =pd.pivot_table(daterange,values=['Average Temperature, Celsius'],index=['Country'],columns=['Month', 'Year'])
monthclimatedata

In [None]:
list(monthclimatedata.columns)

In [None]:
countryinfo =pd.pivot_table(daterange,values=['Average Temperature, Celsius'],index=['DateTime'],columns=['Country'])
countryinfo

In [None]:
list(countryinfo.columns)

In [None]:
t = np.arange(0.0, 2.0, 0.01)
s = np.sin(2 * np.pi * t)

upper = 30.0
lower = 21.0

supper = np.ma.masked_where(s < upper, s)
slower = np.ma.masked_where(s > lower, s)
smiddle = np.ma.masked_where((s < lower) | (s > upper), s)


In [None]:
fig, ax = plt.subplots()
ax.plot(t, smiddle, t, slower, t, supper)
plt.show()