# 23장 판다스로 데이터 탐험하기

<table align="left"><tr><td>
<a href="https://colab.research.google.com/github/rickiepark/python4daml/blob/main/23장.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="코랩에서 실행하기"/></a>
</td></tr></table>

In [153]:
import matplotlib.pyplot as plt

#선 두께
plt.rcParams['lines.linewidth'] = 4
#제목 폰트 크기
plt.rcParams['axes.titlesize'] = 16
#축 레이블 폰트 크기
plt.rcParams['axes.labelsize'] = 16
#x 축 텍스트 크기
plt.rcParams['xtick.labelsize'] = 16
#y 축 텍스트 크기
plt.rcParams['ytick.labelsize'] = 16
#x 축 눈금 크기
plt.rcParams['xtick.major.size'] = 7
#y 축 눈금 크기
plt.rcParams['ytick.major.size'] = 7
#마커 크기
plt.rcParams['lines.markersize'] = 10
#범례를 표시할 때 마커 표시 개수
plt.rcParams['legend.numpoints'] = 1
#범례 텍스트 크기
plt.rcParams['legend.fontsize'] = 14

# 한글 폰트 사용을 위한 코드입니다.
import sys
# 코랩의 경우 나눔 폰트를 설치합니다.
if 'google.colab' in sys.modules:
    !sudo apt-get -qq -y install fonts-nanum
    import matplotlib.font_manager as fm
    font_files = fm.findSystemFonts(fontpaths=['/usr/share/fonts/truetype/nanum'])
    for fpath in font_files:
        fm.fontManager.addfont(fpath)

# 나눔 폰트를 사용합니다.
import matplotlib

matplotlib.rc('font', family='NanumBarunGothic')
matplotlib.rcParams['axes.unicode_minus'] = False

# 코랩의 경우 필요한 데이터를 깃허브에서 다운로드합니다.
if 'google.colab' in sys.modules:
    import gdown
    gdown.download('https://raw.githubusercontent.com/rickiepark/python4daml/main/wwc2019_q-f.csv', quiet=False)
    gdown.download('https://raw.githubusercontent.com/rickiepark/python4daml/main/global-fossil-fuel-consumption.csv', quiet=False)
    gdown.download('https://raw.githubusercontent.com/rickiepark/python4daml/main/US_temperatures.csv', quiet=False)

Downloading...
From: https://raw.githubusercontent.com/rickiepark/python4daml/main/wwc2019_q-f.csv
To: /content/wwc2019_q-f.csv
259B [00:00, 445kB/s]                    
Downloading...
From: https://raw.githubusercontent.com/rickiepark/python4daml/main/global-fossil-fuel-consumption.csv
To: /content/global-fossil-fuel-consumption.csv
2.25kB [00:00, 3.76MB/s]                   
Downloading...
From: https://raw.githubusercontent.com/rickiepark/python4daml/main/US_temperatures.csv
To: /content/US_temperatures.csv
2.42MB [00:00, 17.6MB/s]                          


## 23.1 데이터프레임과 CSV 파일

In [154]:
import pandas as pd
wwc = pd.read_csv('wwc2019_q-f.csv')
print(wwc)

           Round       Winner  W Goals        Loser  L Goals
0       Quarters      England        3       Norway        0
1       Quarters          USA        2       France        1
2       Quarters  Netherlands        2        Italy        0
..           ...          ...      ...          ...      ...
5          Semis  Netherlands        1       Sweden        0
6      3rd Place       Sweden        2      England        1
7   Championship          USA        2  Netherlands        0

[8 rows x 5 columns]


In [155]:
for i in wwc.index:
    print(i)

0
1
2
3
4
5
6
7


In [156]:
for c in wwc.columns:
    print(c)

Round
Winner
W Goals
Loser
L Goals


In [157]:
print(wwc.values)

[['Quarters' 'England' 3 'Norway' 0]
 ['Quarters' 'USA' 2 'France' 1]
 ['Quarters' 'Netherlands' 2 'Italy' 0]
 ['Quarters' 'Sweden' 2 'Germany' 1]
 ['Semis' 'USA' 2 'England' 1]
 ['Semis' 'Netherlands' 1 'Sweden' 0]
 ['3rd Place' 'Sweden' 2 'England' 1]
 ['Championship' 'USA' 2 'Netherlands' 0]]


In [158]:
wwc.shape

(8, 5)

## 23.2 시리즈와 데이터프레임 만들기

In [159]:
print(pd.DataFrame())

Empty DataFrame
Columns: []
Index: []


In [160]:
rounds = ['Semis', 'Semis', '3rd Place', 'Championship']
print(pd.DataFrame(rounds))

              0
0         Semis
1         Semis
2     3rd Place
3  Championship


In [161]:
print(pd.DataFrame({'Round': rounds}))

          Round
0         Semis
1         Semis
2     3rd Place
3  Championship


In [162]:
rounds = ['Semis', 'Semis', '3rd Place', 'Championship']
teams = ['USA', 'Netherlands', 'Sweden', 'USA']
df = pd.DataFrame({'Round': rounds, 'Winner': teams})
print(df)

          Round       Winner
0         Semis          USA
1         Semis  Netherlands
2     3rd Place       Sweden
3  Championship          USA


In [163]:
df['W Goals'] = [2, 1, 0, 0]
print(df)

          Round       Winner  W Goals
0         Semis          USA        2
1         Semis  Netherlands        1
2     3rd Place       Sweden        0
3  Championship          USA        0


In [164]:
df['W Goals'] = [2, 1, 2, 2]
print(df)

          Round       Winner  W Goals
0         Semis          USA        2
1         Semis  Netherlands        1
2     3rd Place       Sweden        2
3  Championship          USA        2


In [165]:
print(df.drop('Winner', axis = 'columns'))

          Round  W Goals
0         Semis        2
1         Semis        1
2     3rd Place        2
3  Championship        2


In [166]:
quarters_dict = {'Round': ['Quarters']*4,
                 'Winner': ['England', 'USA', 'Netherlands', 'Sweden'],
                 'W Goals': [3, 2, 2, 2]}
df = pd.concat([pd.DataFrame(quarters_dict), df], sort = False)
print(df)

           Round       Winner  W Goals
0       Quarters      England        3
1       Quarters          USA        2
2       Quarters  Netherlands        2
..           ...          ...      ...
1          Semis  Netherlands        1
2      3rd Place       Sweden        2
3   Championship          USA        2

[8 rows x 3 columns]


In [167]:
pd.concat([pd.DataFrame(quarters_dict), df], sort = True)

Unnamed: 0,Round,W Goals,Winner
0,Quarters,3,England
1,Quarters,2,USA
2,Quarters,2,Netherlands
...,...,...,...
1,Semis,1,Netherlands
2,3rd Place,2,Sweden
3,Championship,2,USA


In [168]:
df.reset_index(drop = True)

Unnamed: 0,Round,Winner,W Goals
0,Quarters,England,3
1,Quarters,USA,2
2,Quarters,Netherlands,2
...,...,...,...
5,Semis,Netherlands,1
6,3rd Place,Sweden,2
7,Championship,USA,2


In [169]:
df.reset_index(drop = False)

Unnamed: 0,index,Round,Winner,W Goals
0,0,Quarters,England,3
1,1,Quarters,USA,2
2,2,Quarters,Netherlands,2
...,...,...,...,...
5,1,Semis,Netherlands,1
6,2,3rd Place,Sweden,2
7,3,Championship,USA,2


In [170]:
df.set_index('Round')

Unnamed: 0_level_0,Winner,W Goals
Round,Unnamed: 1_level_1,Unnamed: 2_level_1
Quarters,England,3
Quarters,USA,2
Quarters,Netherlands,2
...,...,...
Semis,Netherlands,1
3rd Place,Sweden,2
Championship,USA,2


## 23.3 열과 행 선택하기

In [171]:
wwc['Winner']

Unnamed: 0,Winner
0,England
1,USA
2,Netherlands
...,...
5,Netherlands
6,Sweden
7,USA


In [172]:
winners = ''
for w in wwc['Winner']:
    winners += w + ','
print(winners[:-1])

England,USA,Netherlands,Sweden,USA,Netherlands,Sweden,USA


**뇌풀기 문제**

In [173]:
def sum_of_columns(df, col_label):
    """df는 데이터프레임이고 col_label은 정수 타입 열의 레이블이라고 가정합니다.
       col_label 열 값의 합을 반환합니다"""
    sum = 0
    for v in df[col_label]:
        sum += v
    return sum

sum_of_columns(df, 'W Goals')

16

In [174]:
wwc[['Winner', 'Loser']]

Unnamed: 0,Winner,Loser
0,England,Norway
1,USA,France
2,Netherlands,Italy
...,...,...
5,Netherlands,Sweden
6,Sweden,England
7,USA,Netherlands


In [175]:
wwc[['Round','Winner','Loser','W Goals','L Goals']]

Unnamed: 0,Round,Winner,Loser,W Goals,L Goals
0,Quarters,England,Norway,3,0
1,Quarters,USA,France,2,1
2,Quarters,Netherlands,Italy,2,0
...,...,...,...,...,...
5,Semis,Netherlands,Sweden,1,0
6,3rd Place,Sweden,England,2,1
7,Championship,USA,Netherlands,2,0


In [176]:
wwc[1:2]

Unnamed: 0,Round,Winner,W Goals,Loser,L Goals
1,Quarters,USA,2,France,1


In [177]:
print(wwc[1:2])

      Round Winner  W Goals   Loser  L Goals
1  Quarters    USA        2  France        1


### 23.3.1 loc와 iloc를 사용하여 선택하기

In [178]:
print(wwc.loc[3])

Round      Quarters
Winner       Sweden
W Goals           2
Loser       Germany
L Goals           1
Name: 3, dtype: object


In [179]:
print(wwc.loc[[1,3,5]])

      Round       Winner  W Goals    Loser  L Goals
1  Quarters          USA        2   France        1
3  Quarters       Sweden        2  Germany        1
5     Semis  Netherlands        1   Sweden        0


In [180]:
print(wwc.loc[3:7:2])

          Round       Winner  W Goals        Loser  L Goals
3      Quarters       Sweden        2      Germany        1
5         Semis  Netherlands        1       Sweden        0
7  Championship          USA        2  Netherlands        0


In [181]:
print(wwc.loc[6:])

          Round  Winner  W Goals        Loser  L Goals
6     3rd Place  Sweden        2      England        1
7  Championship     USA        2  Netherlands        0


In [182]:
print(wwc.loc[:2])

      Round       Winner  W Goals   Loser  L Goals
0  Quarters      England        3  Norway        0
1  Quarters          USA        2  France        1
2  Quarters  Netherlands        2   Italy        0


**뇌풀기 문제**

In [183]:
wwc.loc[::2]

Unnamed: 0,Round,Winner,W Goals,Loser,L Goals
0,Quarters,England,3,Norway,0
2,Quarters,Netherlands,2,Italy,0
4,Semis,USA,2,England,1
6,3rd Place,Sweden,2,England,1


In [184]:
print(wwc.loc[0:2, 'Round':'L Goals':2])

      Round  W Goals  L Goals
0  Quarters        3        0
1  Quarters        2        1
2  Quarters        2        0


**뇌풀기 문제**

In [185]:
print(wwc.loc[1:2])

      Round       Winner  W Goals   Loser  L Goals
1  Quarters          USA        2  France        1
2  Quarters  Netherlands        2   Italy        0


In [186]:
wwc_by_round = wwc.set_index('Round')
print(wwc_by_round)

                   Winner  W Goals        Loser  L Goals
Round                                                   
Quarters          England        3       Norway        0
Quarters              USA        2       France        1
Quarters      Netherlands        2        Italy        0
...                   ...      ...          ...      ...
Semis         Netherlands        1       Sweden        0
3rd Place          Sweden        2      England        1
Championship          USA        2  Netherlands        0

[8 rows x 4 columns]


In [187]:
print(wwc_by_round.loc['Semis'])

            Winner  W Goals    Loser  L Goals
Round                                        
Semis          USA        2  England        1
Semis  Netherlands        1   Sweden        0


In [188]:
print(wwc_by_round.loc[['Semis', 'Championship']])

                   Winner  W Goals        Loser  L Goals
Round                                                   
Semis                 USA        2      England        1
Semis         Netherlands        1       Sweden        0
Championship          USA        2  Netherlands        0


In [189]:
print(wwc_by_round.loc['Quarters':'Semis':2])

               Winner  W Goals    Loser  L Goals
Round                                           
Quarters      England        3   Norway        0
Quarters  Netherlands        2    Italy        0
Semis             USA        2  England        1


### 23.3.2 그룹 선택하기

In [190]:
grouped_by_round = wwc.groupby('Round')
print(grouped_by_round.sum())

                                   Winner  W Goals                     Loser  \
Round                                                                          
3rd Place                          Sweden        2                   England   
Championship                          USA        2               Netherlands   
Quarters      EnglandUSANetherlandsSweden        9  NorwayFranceItalyGermany   
Semis                      USANetherlands        3             EnglandSweden   

              L Goals  
Round                  
3rd Place           1  
Championship        0  
Quarters            2  
Semis               1  


In [191]:
print(wwc.groupby('Winner').mean())

TypeError: agg function failed [how->mean,dtype->object]

In [None]:
print(wwc.groupby(['Loser', 'Round']).mean())

### 23.3.3 내용으로 선택하기

In [None]:
print(wwc.loc[wwc['Winner'] == 'Sweden'])

In [None]:
print(wwc.loc[(wwc['Winner'] == 'Sweden') | (wwc['Loser'] == 'Sweden')])

**뇌풀기 문제**

In [None]:
wwc.loc[((wwc['Winner'] == 'USA') & (wwc['Loser'] != 'France')) |
        ((wwc['Winner'] == 'France') & (wwc['Loser'] != 'USA'))]

In [None]:
def get_country(df, country):
    """df는 Winner와 Loser 열을 가진 데이터프레임이고, country는 문자열입니다.
       Winner나 Loser 열에 country가 나타난 행을 모두 담은 데이터프레임을 반환합니다"""
    return df.loc[(df['Winner'] == country) | (df['Loser'] == country)]

In [None]:
get_country(get_country(wwc, 'Sweden'),'Germany')

In [None]:
def get_games(df, countries):
    return df[(df['Winner'].isin(countries)) |
              (df['Loser'].isin(countries))]

**뇌풀기 문제**

In [None]:
wwc[((wwc['Winner'] == 'Sweden') & (wwc['Loser'].isin(['Germany', 'Netherlands']))) |
    ((wwc['Loser'] == 'Sweden') & (wwc['Winner'].isin(['Germany', 'Netherlands'])))]

## 데이터프레임 조작하기

In [None]:
2*wwc['W Goals']

In [None]:
(wwc[wwc['Winner'] == 'Sweden']['W Goals'].sum() +
 wwc[wwc['Loser'] == 'Sweden']['L Goals'].sum())

In [None]:
(wwc['W Goals'].sum() - wwc['L Goals'].sum())/len(wwc['W Goals'])

**뇌풀기 문제**

In [None]:
wwc['W Goals'].sum() + wwc['L Goals'].sum()

**뇌풀기 문제**

In [None]:
wwc[wwc['Round'] == 'Quarters']['L Goals'].sum()

In [None]:
#wwc에 새로운 열 추가
wwc['G Diff'] = wwc['W Goals'] - wwc['L Goals']
#새로운 행을 위한 딕셔너리 만들기
new_row_dict = {'Round': ['Total'],
                'W Goals': [wwc['W Goals'].sum()],
                'L Goals': [wwc['L Goals'].sum()],
                'G Diff': [wwc['G Diff'].sum()]}
#딕셔너리로 데이터프레임을 만들고 wwc와 합치기
new_row = pd.DataFrame(new_row_dict)
wwc = pd.concat([wwc, new_row], sort = False).reset_index(drop = True)
print(wwc)

In [None]:
#print(wwc.loc[wwc['Round'] != 'Total'].corr(method = 'pearson'))
print(
    wwc.loc[wwc["Round"] != "Total"]
       .corr(numeric_only=True, method="pearson")
)

## 23.5 확장 예제

### 23.5.1 온도 데이터

In [None]:
pd.set_option('display.max_rows', 6)
pd.set_option('display.max_columns', 5)
temperatures = pd.read_csv('US_temperatures.csv')
print(temperatures)

In [None]:
temperatures.loc[temperatures['Date']==19790812][['New York','Tampa']]

**뇌풀기 문제**

In [None]:
temperatures.loc[temperatures['Date']==19790812]['Phoenix'] > \
temperatures.loc[temperatures['Date']==19790812]['Tampa']

**뇌풀기 문제**

In [None]:
temperatures.loc[temperatures['Phoenix'] == 41.4]['Date']

In [None]:
temperatures['Max T'] = temperatures.max(axis = 'columns')
temperatures['Min T'] = temperatures.min(axis = 'columns')
temperatures['Mean T'] = round(temperatures.mean(axis = 'columns'), 2)
print(temperatures.loc[temperatures['Date']==20000704])

In [None]:
temperatures = temperatures.drop(['Max T', 'Min T', 'Mean T'], axis='columns')
temperatures.set_index('Date', drop = True, inplace = True)
temperatures['Max T'] = temperatures.max(axis = 'columns')
temperatures['Min T'] = temperatures.min(axis = 'columns')
temperatures['Mean T'] = round(temperatures.mean(axis = 'columns'), 2)
print(temperatures.loc[20000704:20000704])

In [None]:
plt.figure(figsize = (14, 3)) #피겨 크기 지정
plt.plot(list(temperatures['Mean T']))
plt.title('미국 21개 도시의 평균 온도')
plt.xlabel('1961/1/1부터 날짜')
plt.ylabel('온도 (C)')
plt.show()

In [None]:
plt.figure(figsize = (14, 3)) #피겨 크기 지정
plt.plot(list(temperatures['Mean T'])[0:3*365])
plt.title('미국 21개 도시의 평균 온도')
plt.xlabel('1961/1/1부터 날짜')
plt.ylabel('온도 (C)')
plt.show()

예제 23-2 연도와 온도 데이터를 매핑한 딕셔너리 만들기

In [None]:
def get_dict(temperatures, labels):
    """temperatures는 데이터프레임입니다. 인덱스는 yyyymmdd 형태의 날짜를 나타내는 정수입니다.
       labels은 열 레이블의 리스트입니다.
       연도가 키이고, 값은 딕셔너리입니다.
       이 딕셔너리는 열 레이블이 키이고 각 열의 일자별 온도 리스트가 값입니다"""
    year_dict = {}
    for index, row in temperatures.iterrows():
        year = str(index)[0:4]
        try:
            for col in labels:
                year_dict[year][col].append(row[col])
        except:
            year_dict[year] = {col:[] for col in labels}
            for col in labels:
                year_dict[year][col].append(row[col])
    return year_dict

예제 23-3 연도별 데이터프레임 만들기

In [None]:
import numpy as np

temperatures = pd.read_csv('US_temperatures.csv')
temperatures.set_index('Date', drop = True, inplace = True)
temperatures['Mean T'] = round(temperatures.mean(axis = 'columns'), 2)
temperatures['Max T'] = temperatures.max(axis = 'columns')
temperatures['Min T'] = temperatures.min(axis = 'columns')
yearly_dict = get_dict(temperatures, ['Max T', 'Min T', 'Mean T'])
years, mins, maxes, means = [], [], [], []
for y in yearly_dict:
    years.append(y)
    mins.append(min(yearly_dict[y]['Min T']))
    maxes.append(max(yearly_dict[y]['Max T']))
    means.append(round(np.mean(yearly_dict[y]['Mean T']), 2))

yearly_temps = pd.DataFrame({'Year': years, 'Min T': mins,
                             'Max T': maxes, 'Mean T': means})
print(yearly_temps)

예제 23-4 연도별 온도 그래프 그리기

그림 23-2 평균 연간 온도와 최소 연간 온도

In [None]:
plt.figure(0)
plt.plot(yearly_temps['Year'], yearly_temps['Mean T'])
plt.title('미국 21개 도시의 연간 평균 온도')
plt.figure(1)
plt.plot(yearly_temps['Year'], yearly_temps['Min T'])
plt.title('미국 21개 도시의 연간 최저 온도')
for i in range(2):
    plt.figure(i)
    plt.xticks(range(0, len(yearly_temps), 4),
               rotation = 'vertical', size = 'large')
    plt.ylabel('온도 (C)')

그림 23-3 최소 온도의 이동 평균

In [None]:
plt.plot(yearly_temps['Year'], yearly_temps['Min T'].rolling(7).mean())
plt.title('미국 21개 도시의 연간 최소 온도의 이동평균')
plt.xticks(range(0, len(yearly_temps), 4),
           rotation = 'vertical', size = 'large')
plt.ylabel('온도 (C)')
plt.show()

In [None]:
num_years = 7
for label in ['Min T', 'Max T', 'Mean T']:
    yearly_temps[label] = yearly_temps[label].rolling(num_years).mean()
yearly_temps['Year'] = yearly_temps['Year'].apply(int)
print(yearly_temps.corr())

In [None]:
def r_squared(measured, predicted):
    """measured는 측정 값을 담은 1차원 배열이고
       predicted는 예측 값을 담은 1차원 배열이라고 가정합니다.
       결정 계수를 반환합니다"""
    estimated_error = ((predicted - measured)**2).sum()
    mean_of_measured = measured.sum()/len(measured)
    variability = ((measured - mean_of_measured)**2).sum()
    return 1 - estimated_error/variability

In [None]:
indices = np.isfinite(yearly_temps['Mean T'])
model = np.polyfit(list(yearly_temps['Year'][indices]),
                   list(yearly_temps['Mean T'][indices]), 1)
print(r_squared(yearly_temps['Mean T'][indices],
                np.polyval(model, yearly_temps['Year'][indices])))

**뇌풀기 문제**

In [None]:
years_arr = np.array(years).astype(int)
yr_means_arr = np.array(means)
model = np.polyfit(years_arr, yr_means_arr, 1)
print(r_squared(yr_means_arr, np.polyval(model, years_arr)))

예제 23-5 도시별 평균 온도

In [None]:
temperatures = pd.read_csv('US_temperatures.csv')
temperatures.drop('Date', axis = 'columns', inplace = True)
means = round(temperatures.mean(), 2)
maxes = temperatures.max()
mins = temperatures.min()
city_temps = pd.DataFrame({'Min T':mins, 'Max T':maxes,
                           'Mean T':means})
city_temps = city_temps.apply(lambda x: 1.8*x + 32)
city_temps['Max-Min'] = city_temps['Max T'] -city_temps['Min T']
print(city_temps.sort_values('Mean T', ascending = False).to_string())

그림 23-4 도시별 온도 차이

In [None]:
plt.plot(city_temps.sort_values('Max-Min', ascending=False)['Min T'],
         'b^', label = 'Min T')
plt.plot(city_temps.sort_values('Max-Min', ascending=False)['Max T'],
         'kx', label = 'Max T')
plt.plot(city_temps.sort_values('Max-Min', ascending=False)['Mean T'],
         'ro', label = 'Mean T')
plt.xticks(rotation = 'vertical')
plt.legend()
plt.title('1961-2015년 사이의 최대/최저 온도')
plt.ylabel('온도 (F)')
plt.show()

### 23.5.2 화석 연료 소비량

In [None]:
emissions = pd.read_csv('global-fossil-fuel-consumption.csv')
print(emissions)

In [None]:
emissions['Fuels'] = emissions.sum(axis = 'columns')
emissions.drop(['Coal', 'Crude Oil', 'Natural Gas'], axis = 'columns',
               inplace = True)
num_years = 5
emissions['Roll F'] = emissions['Fuels'].rolling(num_years).mean()
emissions = emissions.round()

그림 23-5 전 세계 화석 연료 소비량

In [None]:
plt.plot(emissions['Year'], emissions['Fuels'],
         label = 'Consumption')
plt.plot(emissions['Year'], emissions['Roll F'],
         label = str(num_years) + ' Year Rolling Ave.')
plt.legend()
plt.title('화석 연료 소비량')
plt.xlabel('연도')
plt.ylabel('소비량')
plt.show()

In [None]:
temperatures = pd.read_csv('US_temperatures.csv')
temperatures.set_index('Date', drop = True, inplace = True)
temperatures['Mean T'] = round(temperatures.mean(axis = 'columns'), 2)
temperatures['Max T'] = temperatures.max(axis = 'columns')
temperatures['Min T'] = temperatures.min(axis = 'columns')
yearly_dict = get_dict(temperatures, ['Max T', 'Min T', 'Mean T'])
years, mins, maxes, means = [], [], [], []
for y in yearly_dict:
    years.append(y)
    mins.append(min(yearly_dict[y]['Min T']))
    maxes.append(max(yearly_dict[y]['Max T']))
    means.append(round(np.mean(yearly_dict[y]['Mean T']), 2))

yearly_temps = pd.DataFrame({'Year': years, 'Min T': mins,
                             'Max T': maxes, 'Mean T': means})
print(yearly_temps)

In [None]:
yearly_temps['Year'] = yearly_temps['Year'].astype(int)
merged_df = pd.merge(yearly_temps, emissions,
                     left_on = 'Year', right_on = 'Year')
print(merged_df)

In [None]:
print(merged_df.corr().round(2).to_string())