# First, let's take a look at what our dataset looks like.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv("../input/elnino.csv")

# Remove extra space in columns
df.columns = [col.strip() for col in df.columns]

df.head()

In [None]:
# Air Temp summary statistics
df['Air Temp'] = pd.to_numeric(df['Air Temp'], errors='coerce')
df['Air Temp'].describe()

In [None]:
# Sea Surface Temp summary statistics
df['Sea Surface Temp'] = pd.to_numeric(df['Sea Surface Temp'], errors='coerce')
df['Sea Surface Temp'].describe()

# It's often easier to see relationships visually. Let's see if there's any relationship between Air Temp and Sea Surface Temp using a Seaborn joint scatterplot. 

In [None]:
sns.jointplot(x="Air Temp", y="Sea Surface Temp", data=df, size=7)

# Other ideas to explore:  
* How do the variables relate to each other?
* Which variables have a greater effect on the climate variations?
* Does the amount of movement of the buoy effect the reliability of the data?

# We might find more relationships by looking at a Seaborn heatmap.

In [None]:
# Drop some columns and nans before creating heat map.
df_num = df.drop(['Observation', 'Year', 'Month', 'Day', 'Date'], axis=1)
df_num = df_num.apply(pd.to_numeric, errors='coerce')
df_num = df_num.dropna()

sns.heatmap(df_num.corr(),linewidths=0.25,vmax=1.0, square=True, cmap="PuBuGn", linecolor='k', annot=True)

# Based on the heat map lets look at other joint plots.

In [None]:
sns.jointplot(x="Zonal Winds", y="Sea Surface Temp", data=df_num, size=7)

In [None]:
sns.jointplot(x="Air Temp", y="Humidity", data=df_num, size=7)