# Data analysis of Climate change

## I. Create a Database

In [1]:
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
import numpy as np

# First import useful tools for data analysis

Then we need to access and read three tables.(temperatures stations and countries)  
Using pd.read_csv()

In [2]:
temps = pd.read_csv('temps.csv')
#Since I already download the file, just import is enougha

In [3]:
temps.head(5)

Unnamed: 0,ID,Year,VALUE1,VALUE2,VALUE3,VALUE4,VALUE5,VALUE6,VALUE7,VALUE8,VALUE9,VALUE10,VALUE11,VALUE12
0,ACW00011604,1961,-89.0,236.0,472.0,773.0,1128.0,1599.0,1570.0,1481.0,1413.0,1174.0,510.0,-39.0
1,ACW00011604,1962,113.0,85.0,-154.0,635.0,908.0,1381.0,1510.0,1393.0,1163.0,994.0,323.0,-126.0
2,ACW00011604,1963,-713.0,-553.0,-99.0,541.0,1224.0,1627.0,1620.0,1596.0,1332.0,940.0,566.0,-108.0
3,ACW00011604,1964,62.0,-85.0,55.0,738.0,1219.0,1442.0,1506.0,1557.0,1221.0,788.0,546.0,112.0
4,ACW00011604,1965,44.0,-105.0,38.0,590.0,987.0,1500.0,1487.0,1477.0,1377.0,974.0,31.0,-178.0


As provided, the data set contains the following columns: 

- `ID`: the ID number of the station. We can use this to figure out which country the station is in, as well as the spatial location of the station. 
- `Year`: the year of the measurement. 
- `VALUE1`-`VALUE12`: the temperature measurements themselves. `VALUE1` contains the temperature measurements for January, `VALUE2` for February, and so on. 
- The measurements are in hundredths of a degree, Celsius. 

And such data is hard to deal with, we may need to do some changes so that each months' data is in a single column. 

First, we could convert all the columns into a mulit-index for the data frame. Here we use ID and Year.

In [4]:
temps=temps.set_index(keys=["ID","Year"])
temps.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,VALUE1,VALUE2,VALUE3,VALUE4,VALUE5,VALUE6,VALUE7,VALUE8,VALUE9,VALUE10,VALUE11,VALUE12
ID,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
ACW00011604,1961,-89.0,236.0,472.0,773.0,1128.0,1599.0,1570.0,1481.0,1413.0,1174.0,510.0,-39.0
ACW00011604,1962,113.0,85.0,-154.0,635.0,908.0,1381.0,1510.0,1393.0,1163.0,994.0,323.0,-126.0
ACW00011604,1963,-713.0,-553.0,-99.0,541.0,1224.0,1627.0,1620.0,1596.0,1332.0,940.0,566.0,-108.0
ACW00011604,1964,62.0,-85.0,55.0,738.0,1219.0,1442.0,1506.0,1557.0,1221.0,788.0,546.0,112.0
ACW00011604,1965,44.0,-105.0,38.0,590.0,987.0,1500.0,1487.0,1477.0,1377.0,974.0,31.0,-178.0


Then, we use stack() method to rotate the axis of "value". A new column will be creadted. 

In [5]:
temps = temps.stack() 
#note here we must have"=", stack() method does not change the origin
temps.head()

ID           Year        
ACW00011604  1961  VALUE1     -89.0
                   VALUE2     236.0
                   VALUE3     472.0
                   VALUE4     773.0
                   VALUE5    1128.0
dtype: float64

And now we can use reset_index() method.

In [6]:
temps= temps.reset_index()
temps

Unnamed: 0,ID,Year,level_2,0
0,ACW00011604,1961,VALUE1,-89.0
1,ACW00011604,1961,VALUE2,236.0
2,ACW00011604,1961,VALUE3,472.0
3,ACW00011604,1961,VALUE4,773.0
4,ACW00011604,1961,VALUE5,1128.0
...,...,...,...,...
13992657,ZIXLT622116,1970,VALUE8,1540.0
13992658,ZIXLT622116,1970,VALUE9,2040.0
13992659,ZIXLT622116,1970,VALUE10,2030.0
13992660,ZIXLT622116,1970,VALUE11,2130.0


Now it's time to relabel the ugly "lebel_0" and "0"

In [7]:
temps=temps.rename(columns={"level_2":"Month",0:"temp"})
temps

Unnamed: 0,ID,Year,Month,temp
0,ACW00011604,1961,VALUE1,-89.0
1,ACW00011604,1961,VALUE2,236.0
2,ACW00011604,1961,VALUE3,472.0
3,ACW00011604,1961,VALUE4,773.0
4,ACW00011604,1961,VALUE5,1128.0
...,...,...,...,...
13992657,ZIXLT622116,1970,VALUE8,1540.0
13992658,ZIXLT622116,1970,VALUE9,2040.0
13992659,ZIXLT622116,1970,VALUE10,2030.0
13992660,ZIXLT622116,1970,VALUE11,2130.0


It looks much better. And we need to replace value12345... by 123456, which is true month 

In [8]:
temps["Month"] = temps["Month"].str[5:].astype(int)

In [9]:
temps

Unnamed: 0,ID,Year,Month,temp
0,ACW00011604,1961,1,-89.0
1,ACW00011604,1961,2,236.0
2,ACW00011604,1961,3,472.0
3,ACW00011604,1961,4,773.0
4,ACW00011604,1961,5,1128.0
...,...,...,...,...
13992657,ZIXLT622116,1970,8,1540.0
13992658,ZIXLT622116,1970,9,2040.0
13992659,ZIXLT622116,1970,10,2030.0
13992660,ZIXLT622116,1970,11,2130.0


And we need to make our temp more familiar by divide 100

In [10]:
temps["temp"]  =temps["temp"] / 100
temps

Unnamed: 0,ID,Year,Month,temp
0,ACW00011604,1961,1,-0.89
1,ACW00011604,1961,2,2.36
2,ACW00011604,1961,3,4.72
3,ACW00011604,1961,4,7.73
4,ACW00011604,1961,5,11.28
...,...,...,...,...
13992657,ZIXLT622116,1970,8,15.40
13992658,ZIXLT622116,1970,9,20.40
13992659,ZIXLT622116,1970,10,20.30
13992660,ZIXLT622116,1970,11,21.30


Now we can import this table to our data base.

In [11]:
import sqlite3 
# use module sqlite3 to help to create ,edit and query databases

In [12]:
db=sqlite3.connect("climate-data.db")
#create an empty database called climate-data

Use .to_sql() method to write this table in the database (db here).  
About .to_sql, check https://pandas.pydata.org/pandas-docs/version/0.23/generated/pandas.DataFrame.to_sql.html

In [13]:
temps.to_sql("temps",db,if_exists="replace",index=False)
#Here we use if_exists="replace" since we write the whole table at once

Now we should import stations and countries

In [14]:
url = "https://raw.githubusercontent.com/PhilChodrow/PIC16B/master/datasets/noaa-ghcn/station-metadata.csv"
stations = pd.read_csv(url)
stations.head(5)

Unnamed: 0,ID,LATITUDE,LONGITUDE,STNELEV,NAME
0,ACW00011604,57.7667,11.8667,18.0,SAVE
1,AE000041196,25.333,55.517,34.0,SHARJAH_INTER_AIRP
2,AEM00041184,25.617,55.933,31.0,RAS_AL_KHAIMAH_INTE
3,AEM00041194,25.255,55.364,10.4,DUBAI_INTL
4,AEM00041216,24.43,54.47,3.0,ABU_DHABI_BATEEN_AIR


The data here is good enough to use, next we just write it into our database

In [15]:
stations.to_sql("stations", db, if_exists = "replace", index = False)

Import countries, same as above.

In [16]:
countries_url = "https://raw.githubusercontent.com/mysociety/gaze/master/data/fips-10-4-to-iso-country-codes.csv"
countries = pd.read_csv(countries_url)
countries.head(5)

Unnamed: 0,FIPS 10-4,ISO 3166,Name
0,AF,AF,Afghanistan
1,AX,-,Akrotiri
2,AL,AL,Albania
3,AG,DZ,Algeria
4,AQ,AS,American Samoa


In [17]:
countries.to_sql("country", db, if_exists = "replace", index = False)

  sql.to_sql(


Now the first part is done, lets check what our database looks like.

In [18]:
cursor = db.cursor()# cursor is used to interact woth the database and 
#will execute SQL commands.
cursor.execute("SELECT sql FROM sqlite_master WHERE type='table';")
for result in cursor.fetchall():
    print(result[0])

CREATE TABLE "temps" (
"ID" TEXT,
  "Year" INTEGER,
  "Month" INTEGER,
  "temp" REAL
)
CREATE TABLE "stations" (
"ID" TEXT,
  "LATITUDE" REAL,
  "LONGITUDE" REAL,
  "STNELEV" REAL,
  "NAME" TEXT
)
CREATE TABLE "country" (
"FIPS 10-4" TEXT,
  "ISO 3166" TEXT,
  "Name" TEXT
)


After finishing construsting our database. It's needed to close the connection.

In [19]:
db.close()

## II. Query Function

We could build a function that make it much easier to query some data we need, instead of using complex SQL commands each time.

The function accept 4 arguments as inputs (Country, year_begin, year_end, month)  
And should return a Pandas dataframe contains as follows:  
###### The station name.  
###### The latitude of the station.  
###### The longitude of the station.  
###### The name of the country in which the station is located.  
###### The year in which the reading was taken.  
###### The month in which the reading was taken.  
###### The average temperature at the specified station during the specified year and month.   

Let's try how we should get our ideal results in an example that input is country = "India", 
                       year_begin = 1980, 
                       year_end = 2020,
                       month = 1

In [20]:
db=sqlite3.connect("climate-data.db") 
# First we need connect the database
cursor = db.cursor()
cmd = \
"""
SELECT "FIPS 10-4" FROM country where Name = "India";
"""
#quary what FIP 10-4 code is for India
cursor.execute(cmd)
result=cursor.fetchone()
result[0]


'IN'

Next we should query the specific data from table temps and stations with condtions.

In [21]:
cmd = \
"""
SELECT S.name,S.LATITUDE,S.LONGITUDE,T.year, T.month, T.temp
FROM temps T
LEFT JOIN stations S ON T.id = S.id
WHERE S.id LIKE 'IN%' AND T.year >=1980 AND T.year<=2020 AND T.month=1
"""
#Here we use LIKE operator in the WHERE sentance, it will match the pattern 
#that S.id begins with IN, which is the FIPS code of India

And we use .read_sql_query() to read the result in pandas

In [22]:
result=pd.read_sql_query(cmd, db)

In [23]:
result
db.close()
# Always close the connection when we finish.

Here we missed one column "Country", we could add it manually.

In [24]:
result.insert(3,'Country',"India")

In [25]:
result

Unnamed: 0,NAME,LATITUDE,LONGITUDE,Country,Year,Month,temp
0,PBO_ANANTAPUR,14.583,77.633,India,1980,1,23.48
1,PBO_ANANTAPUR,14.583,77.633,India,1981,1,24.57
2,PBO_ANANTAPUR,14.583,77.633,India,1982,1,24.19
3,PBO_ANANTAPUR,14.583,77.633,India,1983,1,23.51
4,PBO_ANANTAPUR,14.583,77.633,India,1984,1,24.81
...,...,...,...,...,...,...,...
3147,DARJEELING,27.050,88.270,India,1983,1,5.10
3148,DARJEELING,27.050,88.270,India,1986,1,6.90
3149,DARJEELING,27.050,88.270,India,1994,1,8.10
3150,DARJEELING,27.050,88.270,India,1995,1,5.60


Now this is what we want, writing the related function should be easy.

In [26]:
def query_climate_database(country,year_begin,year_end,month):
    db=sqlite3.connect("climate-data.db")
    cursor = db.cursor()
    country_check=\
    f"""SELECT "FIPS 10-4" FROM country where Name ="{country}" """
    cursor.execute(country_check)
    result=cursor.fetchone()
    country_code=result[0]
    main_query= \
    f"""
    SELECT S.name,S.LATITUDE,S.LONGITUDE,T.year, T.month, T.temp
    FROM temps T
    LEFT JOIN stations S ON T.id = S.id
    WHERE S.id LIKE '{country_code}%' AND T.year >={year_begin} AND T.year<={year_end} AND T.month={month}
    """
    result=pd.read_sql_query(main_query, db)
    result.insert(3,'Country',country)
    db.close()
    return result

Done! We can try the same input to see if we can get the same output above

In [27]:
query_climate_database(country = "India", 
                       year_begin = 1980, 
                       year_end = 2020,
                       month = 1)

Unnamed: 0,NAME,LATITUDE,LONGITUDE,Country,Year,Month,temp
0,PBO_ANANTAPUR,14.583,77.633,India,1980,1,23.48
1,PBO_ANANTAPUR,14.583,77.633,India,1981,1,24.57
2,PBO_ANANTAPUR,14.583,77.633,India,1982,1,24.19
3,PBO_ANANTAPUR,14.583,77.633,India,1983,1,23.51
4,PBO_ANANTAPUR,14.583,77.633,India,1984,1,24.81
...,...,...,...,...,...,...,...
3147,DARJEELING,27.050,88.270,India,1983,1,5.10
3148,DARJEELING,27.050,88.270,India,1986,1,6.90
3149,DARJEELING,27.050,88.270,India,1994,1,8.10
3150,DARJEELING,27.050,88.270,India,1995,1,5.60


## Data Visualization

First we need to import a package plotly

In [28]:
from plotly import express as px

Again first try to figure it out without function. We can use query_climate_database() we write above to find data required

In [114]:
temp_vs_data=query_climate_database(country = "India", 
                       year_begin = 1980, 
                       year_end = 2020,
                       month = 1)
temp_vs_data

Unnamed: 0,NAME,LATITUDE,LONGITUDE,Country,Year,Month,temp
0,PBO_ANANTAPUR,14.583,77.633,India,1980,1,23.48
1,PBO_ANANTAPUR,14.583,77.633,India,1981,1,24.57
2,PBO_ANANTAPUR,14.583,77.633,India,1982,1,24.19
3,PBO_ANANTAPUR,14.583,77.633,India,1983,1,23.51
4,PBO_ANANTAPUR,14.583,77.633,India,1984,1,24.81
...,...,...,...,...,...,...,...
3147,DARJEELING,27.050,88.270,India,1983,1,5.10
3148,DARJEELING,27.050,88.270,India,1986,1,6.90
3149,DARJEELING,27.050,88.270,India,1994,1,8.10
3150,DARJEELING,27.050,88.270,India,1995,1,5.60


Now we filter out data that not satisfy the minimum required number of years of data.  
Assume the minimum required number=10

In [115]:
min_obs=10
#Note here +1 since we need how many years, not the difference
def count_year(x):
    return len(x)
temp_vs_data["year_count"]=temp_vs_data.groupby(["NAME"])["Month"].transform(count_year)
temp_vs_data

Unnamed: 0,NAME,LATITUDE,LONGITUDE,Country,Year,Month,temp,year_count
0,PBO_ANANTAPUR,14.583,77.633,India,1980,1,23.48,34
1,PBO_ANANTAPUR,14.583,77.633,India,1981,1,24.57,34
2,PBO_ANANTAPUR,14.583,77.633,India,1982,1,24.19,34
3,PBO_ANANTAPUR,14.583,77.633,India,1983,1,23.51,34
4,PBO_ANANTAPUR,14.583,77.633,India,1984,1,24.81,34
...,...,...,...,...,...,...,...,...
3147,DARJEELING,27.050,88.270,India,1983,1,5.10,7
3148,DARJEELING,27.050,88.270,India,1986,1,6.90,7
3149,DARJEELING,27.050,88.270,India,1994,1,8.10,7
3150,DARJEELING,27.050,88.270,India,1995,1,5.60,7


Now we filter out years_of_data<10

In [116]:
temp_vs_data_2=temp_vs_data[temp_vs_data["year_count"]>=10]

In [117]:
temp_vs_data_2

Unnamed: 0,NAME,LATITUDE,LONGITUDE,Country,Year,Month,temp,year_count
0,PBO_ANANTAPUR,14.583,77.633,India,1980,1,23.48,34
1,PBO_ANANTAPUR,14.583,77.633,India,1981,1,24.57,34
2,PBO_ANANTAPUR,14.583,77.633,India,1982,1,24.19,34
3,PBO_ANANTAPUR,14.583,77.633,India,1983,1,23.51,34
4,PBO_ANANTAPUR,14.583,77.633,India,1984,1,24.81,34
...,...,...,...,...,...,...,...,...
3140,SHILONG,25.600,91.890,India,1986,1,10.40,11
3141,SHILONG,25.600,91.890,India,1990,1,11.20,11
3142,SHILONG,25.600,91.890,India,2010,1,11.99,11
3143,SHILONG,25.600,91.890,India,2011,1,9.93,11


Now we compute the year-over-year average change in temperature by finding the cofficient of linear regression

In [118]:
from sklearn.linear_model import LinearRegression

In [119]:
def coef(data_group):
    x = data_group[["Year"]] # 2 brackets because X should be a df
    y = data_group["temp"]   # 1 bracket because y should be a series
    LR = LinearRegression()
    LR.fit(x, y)
    return LR.coef_[0]

In [120]:
coefs = temp_vs_data_2.groupby(["NAME"]).apply(coef)

In [121]:
coefs=coefs.reset_index()

In [122]:
coefs

Unnamed: 0,NAME,0
0,AGARTALA,-0.006184
1,AHMADABAD,0.006731
2,AKOLA,-0.008063
3,ALLAHABAD,-0.029375
4,ALLAHABAD_BAMHRAULI,-0.015457
...,...,...
92,TRIVANDRUM,0.022892
93,UDAIPUR_DABOK,0.072424
94,VARANASI_BABATPUR,-0.012996
95,VERAVAL,0.024848


Note here we lose LATITUDE and LONGITUDE, and we can use merge to restore them

In [123]:
final=pd.merge(coefs,temp_vs_data_2,how='inner',on = ["NAME"]).drop_duplicates('NAME')

In [124]:
final=final.rename(columns={ 0 :"change"})
final

Unnamed: 0,NAME,change,LATITUDE,LONGITUDE,Country,Year,Month,temp,year_count
0,AGARTALA,-0.006184,23.883,91.250,India,1980,1,18.21,33
33,AHMADABAD,0.006731,23.067,72.633,India,1980,1,20.39,38
71,AKOLA,-0.008063,20.700,77.033,India,1980,1,22.47,60
131,ALLAHABAD,-0.029375,25.441,81.735,India,1982,1,17.47,27
158,ALLAHABAD_BAMHRAULI,-0.015457,25.500,81.900,India,1980,1,15.42,26
...,...,...,...,...,...,...,...,...,...
2981,TRIVANDRUM,0.022892,8.500,77.000,India,1980,1,27.10,25
3006,UDAIPUR_DABOK,0.072424,24.617,73.883,India,2010,1,16.84,10
3016,VARANASI_BABATPUR,-0.012996,25.450,82.867,India,1997,1,15.83,22
3038,VERAVAL,0.024848,20.900,70.367,India,1980,1,21.45,41


Now trying to visualize our data.

In [125]:
color_map = px.colors.diverging.RdGy_r
fig = px.scatter_mapbox(final, 
                        lat = "LATITUDE",
                        lon = "LONGITUDE", 
                        hover_name = "NAME", 
                        color = "change",
                        zoom = 2,
                        #opacity = 0.2,
                        height = 300,
                        mapbox_style="carto-positron",
                        color_continuous_scale=color_map)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.update_coloraxes(cmid=0)
fig.update_coloraxes(colorbar_title_text="Avg Yearly Increase(C)",colorbar_title_font_size=10)
fig.show()

Now we should able to finish the function.

In [130]:
def temperature_coefficient_plot(country,year_begin,year_end,month,min_obs,**kwargs):
    temp_vs_data=query_climate_database(country,year_begin,year_end,month)
    temp_vs_data["year_count"]=temp_vs_data.groupby(["NAME"])["Month"].transform(count_year)
    temp_vs_data_2=temp_vs_data[temp_vs_data["year_count"]>=min_obs]
    coefs = temp_vs_data_2.groupby(["NAME"]).apply(coef)
    coefs=coefs.reset_index()
    final=pd.merge(coefs,temp_vs_data_2,how='inner',on = ["NAME"]).drop_duplicates('NAME')
    final=final.rename(columns={ 0 :"change"})
    fig=px.scatter_mapbox(final, 
                        lat = "LATITUDE",
                        lon = "LONGITUDE", 
                        hover_name = "NAME", 
                        color = "change",
                        height = 300,
                        **kwargs)
    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
    fig.update_coloraxes(cmid=0)
    fig.update_coloraxes(colorbar_title_text="Avg Yearly Increase(C)",colorbar_title_font_size=10)
    return fig

In [131]:
color_map = px.colors.diverging.RdGy_r # choose a colormap

fig = temperature_coefficient_plot("India", 1980, 2020, 1, 
                                   min_obs = 10,
                                   zoom = 2,
                                   mapbox_style="carto-positron",
                                   color_continuous_scale=color_map)

fig.show()

## extra Question 1: Is there any relations about climate change and LATITUDE given month and year range?

First we write a function to query data with input latitude range (min, low)

In [133]:
def query_climate_database_by_lat(latmin,latmax,year_begin,year_end,month):
    db=sqlite3.connect("climate-data.db")
    cursor = db.cursor()
    main_query= \
    f"""
    SELECT S.name,S.LATITUDE,T.year, T.month, T.temp
    FROM temps T
    LEFT JOIN stations S ON T.id = S.id
    WHERE S.LATITUDE >={latmin} AND S.LATITUDE<{latmax} AND T.year >={year_begin} AND T.year<={year_end} AND T.month={month}
    """
    result=pd.read_sql_query(main_query, db)
    db.close()
    return result

In [221]:
result=query_climate_database_by_lat(0,10,2000,2010,8)

In [222]:
result

Unnamed: 0,NAME,LATITUDE,Year,Month,temp
0,VICTORIA_POINT,9.967,2001,8,24.42
1,VICTORIA_POINT,9.967,2002,8,23.82
2,SAVE,7.980,2000,8,24.50
3,SAVE,7.980,2001,8,24.40
4,SAVE,7.980,2002,8,25.10
...,...,...,...,...,...
1921,CON_SON,8.683,2001,8,28.26
1922,CON_SON,8.683,2003,8,28.54
1923,CON_SON,8.683,2004,8,28.12
1924,CON_SON,8.683,2008,8,28.16


We also want to filter out with min year condition

In [224]:
result["years_of_data"]=result.groupby(["NAME"])['Year'].transform(count_year)

In [225]:
result

Unnamed: 0,NAME,LATITUDE,Year,Month,temp,years_of_data
0,VICTORIA_POINT,9.967,2001,8,24.42,2
1,VICTORIA_POINT,9.967,2002,8,23.82,2
2,SAVE,7.980,2000,8,24.50,19
3,SAVE,7.980,2001,8,24.40,19
4,SAVE,7.980,2002,8,25.10,19
...,...,...,...,...,...,...
1921,CON_SON,8.683,2001,8,28.26,6
1922,CON_SON,8.683,2003,8,28.54,6
1923,CON_SON,8.683,2004,8,28.12,6
1924,CON_SON,8.683,2008,8,28.16,6


In [226]:
result=result[result["years_of_data"]>=10]
result

Unnamed: 0,NAME,LATITUDE,Year,Month,temp,years_of_data
2,SAVE,7.980,2000,8,24.50,19
3,SAVE,7.980,2001,8,24.40,19
4,SAVE,7.980,2002,8,25.10,19
5,SAVE,7.980,2003,8,25.20,19
6,SAVE,7.980,2004,8,24.90,19
...,...,...,...,...,...,...
1915,CA_MAU,9.183,2005,8,29.40,10
1916,CA_MAU,9.183,2007,8,28.01,10
1917,CA_MAU,9.183,2008,8,28.01,10
1918,CA_MAU,9.183,2009,8,28.76,10


And we calculte the yearly change within this latitute range by stations

In [227]:
coefs2 = result.groupby(["NAME"]).apply(coef)

In [228]:
coefs2=coefs2.reset_index()

In [229]:
final=pd.merge(coefs2,result,how='inner',on = ["NAME"]).drop_duplicates('NAME')
final=final.rename(columns={ 0 :"change"})
final

Unnamed: 0,NAME,change,LATITUDE,Year,Month,temp,years_of_data
0,ABIDJAN_FELIX_HOUPHOUET_BOIGN,0.084636,5.2610,2000,8,24.48,11
11,ADDIS_ABABA,0.026324,9.0000,2000,8,15.60,18
29,ADDIS_ABABA_BOLE,-0.019069,9.0330,2000,8,15.65,10
39,ANTONIO_ROLDAN_BETANCOURT,0.036629,7.8120,2000,8,27.24,10
49,ARUA,0.128332,3.0170,2000,8,20.68,10
...,...,...,...,...,...,...,...
929,TAGBILARAN,0.033615,9.6640,2000,8,27.66,10
939,TUMEREMO,0.038598,7.3000,2000,8,24.82,10
949,VANGUARDIA,0.121818,4.1680,2000,8,24.19,10
959,YAP_ISLAND_WSO_AP,0.030390,9.4833,2000,8,27.23,10


Then we get the average temperature change over year bewtween latitude 0-10, year 2000-2010

In [232]:
final["change"].mean()

0.01849770446574507

Now lets write the function

In [233]:
def avg_temp_change_latitude(latmin,latmax,year_begin,year_end,month,min_obs):
    result=query_climate_database_by_lat(latmin,latmax,year_begin,year_end,month)
    result["years_of_data"]=result.groupby(["NAME"])['Year'].transform(count_year)
    result=result[result["years_of_data"]>=min_obs]
    coefs2 = result.groupby(["NAME"]).apply(coef)
    coefs2=coefs2.reset_index()
    final=pd.merge(coefs2,result,how='inner',on = ["NAME"]).drop_duplicates('NAME')
    final=final.rename(columns={ 0 :"change"})
    return final["change"].mean()

In [None]:
k=[]
for i in range(10)
    inteval=180/10
    l_min=-90+inteval*(i-1)
    l_max=l_min+10
    