In [24]:
# predicts precipitation based on temperature, humidity, etc
from datetime import datetime, timedelta  
import time  
from collections import namedtuple  
import numpy as np
import pandas as pd  
import requests  
import matplotlib.pyplot as plt  
from sklearn.cross_decomposition import PLSRegression
import math
from sklearn.linear_model import LinearRegression
from sklearn import preprocessing, cross_validation, svm

In [25]:
API_KEY = '757257ae87413bf3'  # key to access weather data
BASE_URL = "http://api.wunderground.com/api/{}/history_{}/q/WA/Seattle.json"

In [38]:
start_date = datetime(2017, 12, 26) # start date of the data being retrieved 
features = ["date", "meantempm", "meandewptm", "meanpressurem", "maxhumidity", "minhumidity", "maxtempm",  
            "mintempm", "maxdewptm", "mindewptm", "maxpressurem", "minpressurem", "precipm"]
DailySummary = namedtuple("DailySummary", features)  

In [39]:
def extract_weather_data(url, api_key, start_date, days):  
    records = []
    for _ in range(days):
        request = BASE_URL.format(API_KEY, start_date.strftime('%Y%m%d'))
        response = requests.get(request)
        if response.status_code == 200:
            data = response.json()['history']['dailysummary'][0]
            records.append(DailySummary(
                date=start_date,
                meantempm=data['meantempm'],
                meandewptm=data['meandewptm'],
                meanpressurem=data['meanpressurem'],
                maxhumidity=data['maxhumidity'],
                minhumidity=data['minhumidity'],
                maxtempm=data['maxtempm'],
                mintempm=data['mintempm'],
                maxdewptm=data['maxdewptm'],
                mindewptm=data['mindewptm'],
                maxpressurem=data['maxpressurem'],
                minpressurem=data['minpressurem'],
                precipm=data['precipm']))
        time.sleep(6)
        start_date += timedelta(days=1)
    return records

In [40]:
records = extract_weather_data(BASE_URL, API_KEY, start_date, 100) # extracts data starting from start_date for 100 days

In [41]:
df = pd.DataFrame(records, columns=features).set_index('date')  
print(df)

           meantempm meandewptm meanpressurem maxhumidity minhumidity  \
date                                                                    
2017-12-26         2         -1          1027         100          78   
2017-12-27         2          0          1025          89          76   
2017-12-28         7          6          1018         100          83   
2017-12-29         9          8          1011         100          77   
2017-12-30         7          4          1023          97          74   
2017-12-31         4          1          1025         100          65   
2018-01-01         3         -1          1029          96          65   
2018-01-02         4          1          1027          86          65   
2018-01-03         4          0          1022          92          56   
2018-01-04         6          4          1016         100          80   
2018-01-05         9          8          1016         100          86   
2018-01-06         8          6          1023      

In [50]:
forecast_out = 30 # will forecast 30 days of precipitation data
X = np.array(df.drop(['precipm'], 1))
X_forecast = X[-forecast_out:] 

In [52]:
X_train = X
Y_train = np.array(df.drop(df.columns[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]], 1))

In [53]:
X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X_train, Y_train, test_size=0.2)

In [54]:
lr = LinearRegression()
lr.fit(X_train, Y_train)
accuracy = lr.score(X_test, Y_test)
print(accuracy)

0.14029436974714093


In [55]:
forecast_prediction = lr.predict(X_forecast)
print(forecast_prediction)

[[-2.79473036]
 [ 5.72692529]
 [11.01499611]
 [ 6.75279788]
 [-2.68418721]
 [-7.77657824]
 [ 2.55842141]
 [ 4.55546529]
 [ 3.21316968]
 [ 1.19828637]
 [ 1.50827371]
 [ 0.3448677 ]
 [-0.46378419]
 [-3.33588575]
 [ 1.86009759]
 [ 8.84442958]
 [ 9.87559686]
 [-0.40817175]
 [-0.04085008]
 [ 1.17448689]
 [ 3.70572274]
 [ 1.2056841 ]
 [-2.9074685 ]
 [-0.05523014]
 [ 0.52028615]
 [-0.50700506]
 [ 5.82136161]
 [ 0.23972006]
 [-1.86759332]
 [ 7.2866685 ]]
