In [1]:
#import dependencies
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import tree
import os

In [2]:
#load minneapolis weather data
mpls = pd.read_csv('./resources/mpls_solar_weather.csv')


In [3]:
#list columns 
mpls_columns = list(mpls.columns)
mpls_columns

['date_time',
 'power_delivered',
 'energy_delivered',
 'cumulative_energy',
 'weather_description',
 'clouds_all',
 'temp_f',
 'pressure',
 'humidity',
 'wind_speed',
 'wind_deg',
 'rain_1h',
 'snow_1h',
 'weather_main',
 'hour',
 'day_of_year',
 'month',
 'sin_day',
 'cos_day',
 'sin_hour',
 'cos_hour',
 'sin_month',
 'cos_month']

In [4]:
features = mpls[['clouds_all','temp_f', 'pressure', 'humidity', 'wind_speed', 
                 'sin_day', 'cos_day', 'sin_hour', 'cos_hour']]
# Saving feature names for later use
feature_names = list(features.columns)

In [5]:
# create bins for power_delivered

labels = range(0, 12)
mpls_power_bins = pd.cut(mpls.power_delivered,
                         [0, 0.0001, 500, 1000, 1500, 2000, 2500, 3000, 3500,
                          4000, 4500, 5000, 5500],
                         labels=labels)
mpls_power_bins.fillna(0, inplace=True)

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(features, mpls_power_bins, random_state=42)

In [7]:
clf = tree.DecisionTreeClassifier()
clf = clf.fit(X_train, y_train)
clf.score(X_test, y_test)

0.7500503524672709

In [8]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=200)
rf = rf.fit(X_train, y_train)
rf.score(X_test, y_test)

0.7915407854984894

In [9]:
sorted(zip(rf.feature_importances_, feature_names), reverse=True)

[(0.3231512265699208, 'cos_hour'),
 (0.12253034041519882, 'temp_f'),
 (0.11162474894731204, 'cos_day'),
 (0.10652131985379498, 'sin_hour'),
 (0.09998501846727009, 'humidity'),
 (0.09139934243789226, 'sin_day'),
 (0.057501474244880606, 'pressure'),
 (0.04686578278694235, 'clouds_all'),
 (0.04042074627678795, 'wind_speed')]

In [10]:
rf.feature_importances_.tolist()

[0.04686578278694235,
 0.12253034041519882,
 0.057501474244880606,
 0.09998501846727009,
 0.04042074627678795,
 0.09139934243789226,
 0.11162474894731204,
 0.10652131985379498,
 0.3231512265699208]