### Exploration/Analysis of Minneapolis Winter Weather 1880-2020
data sourced from Minnesota DNR

In [40]:
#import libraries
import pandas as pd

Start with some data cleaning to prepare for analysis.

In [41]:
# Load file
df = pd.read_csv('data/msp_historical_weather.csv')
# Remove first column
df = df.drop(columns=['Unnamed: 0'])
# Show sample
df.sample()
#Check datatypes
df.dtypes
# check for na's
df.isnull().sum()
# drop where tempature is missing
df = df.loc[150:]
df.sample()
#find and replace T with .01
df = df.replace('T',.01)
# replace missing values with Nan
df = df.replace('M','NaN')
# show sample
df.sample()

Unnamed: 0,Date,MaxTemp,MinTemp,Precip,Snow,SnowDepth,datetime,day,month,year,season
22625,5/12/1969,64,31,0,0,0,1969-05-12,12,5,1969,Spring


Tempature Trends in Winter

In [43]:
# Make DF just for tempatures
temps = df[['MaxTemp','MinTemp','Date','day','month','year','season','datetime']]
# Show sample
temps.sample()

Unnamed: 0,MaxTemp,MinTemp,Date,day,month,year,season,datetime
51088,48,36,10/3/1980,3,10,1980,Fall,1980-10-03


In [44]:
# check for Nan values
# Remove NaN
just_temps = temps[['MaxTemp','MinTemp']] 
just_temps=just_temps[(just_temps['MaxTemp']!= 'NaN') & (just_temps['MinTemp'] != 'NaN')]
# To numeric
just_temps.MaxTemp=pd.to_numeric(just_temps.MaxTemp)
just_temps.MinTemp=pd.to_numeric(just_temps.MinTemp)
just_temps.dtypes


MaxTemp    int64
MinTemp    int64
dtype: object

In [45]:
# Convert temps from Objects to Numbers
# Get rid of NaN values
temps=temps[(temps['MaxTemp']!= 'NaN') & (temps['MinTemp'] != 'NaN')]
# Conver MaxTemp and MinTemp to numerical values
temps.MaxTemp=pd.to_numeric(temps.MaxTemp)
temps.MinTemp=pd.to_numeric(temps.MinTemp)
#Apply to numeric ignoring uneeded columns
temps = temps.apply(pd.to_numeric, errors='ignore')
# Check dateatypes to confirm that Temps are Integers
temps.dtypes

MaxTemp      int64
MinTemp      int64
Date        object
day          int64
month        int64
year         int64
season      object
datetime    object
dtype: object

Now add some information to better inform analysis. Found the average Tempature and high and Low Tempatures.

In [47]:
# add column with average tempature
temps['AveTemp'] = (temps['MaxTemp'] + temps['MinTemp']) / 2
# add column with differance between High and Low Temp
temps['HighLowDiff'] = (temps['MaxTemp'] - temps['MinTemp'])
# Show sample 
temps.sample(5)
# subset just winter
winter = temps[temps['season']=='Winter']
winter.sample(5)
# Now just segment out January
jan = winter[winter['month']==1]
# Show 
jan.sample(5)

Unnamed: 0,MaxTemp,MinTemp,Date,day,month,year,season,datetime,AveTemp,HighLowDiff
8212,30,1,1/1/1962,1,1,1962,Winter,1962-01-01,15.5,29
8942,6,-9,1/9/1970,9,1,1970,Winter,1970-01-09,-1.5,15
3088,11,0,1/21/1905,21,1,1905,Winter,1905-01-21,5.5,11
6698,32,22,1/21/1945,21,1,1945,Winter,1945-01-21,27.0,10
3274,7,-10,1/27/1907,27,1,1907,Winter,1907-01-27,-1.5,17


In [48]:
winter.describe()

Unnamed: 0,MaxTemp,MinTemp,day,month,year,AveTemp,HighLowDiff
count,13387.0,13387.0,13387.0,13387.0,13387.0,13387.0,13387.0
mean,25.041159,8.586987,15.570628,5.107642,1946.335101,16.814073,16.454172
std,13.136369,14.506678,8.72876,5.019043,42.824798,13.325749,7.464146
min,-20.0,-41.0,1.0,1.0,1872.0,-29.0,1.0
25%,16.0,-2.0,8.0,1.0,1909.0,7.5,11.0
50%,27.0,9.0,16.0,2.0,1946.0,18.0,16.0
75%,35.0,20.0,23.0,12.0,1983.0,27.0,21.0
max,68.0,49.0,31.0,12.0,2020.0,53.5,57.0
