<a href="https://colab.research.google.com/github/kmikk/solar_machine_learning/blob/solar_branch/random_forest_solar_colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
#import dependencies
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import plotly.express as px
from sklearn import tree
import os

In [0]:
#load minneapolis weather data
mpls = pd.read_csv('https://raw.githubusercontent.com/kmikk/solar_machine_learning/master/resources/mpls_solar_weather.csv')


In [0]:
#list columns 
mpls_columns = list(mpls.columns)


In [0]:
#create feature names to use in random forest model
features = mpls[['clouds_all','temp_f', 'pressure', 'humidity', 'wind_speed', 
                 'sin_day', 'cos_day', 'sin_hour', 'cos_hour', 'rain_1h', 'dl_sec']]
# Saving feature names for later use
feature_names = list(features.columns)

In [0]:
# create bins for power_delivered
labels = range(0, 12)
mpls_power_bins = pd.cut(mpls.power_delivered,
                         [0, 0.0001, 500, 1000, 1500, 2000, 2500, 3000, 3500,
                          4000, 4500, 5000, 5500],
                         labels=labels)
mpls_power_bins.fillna(0, inplace=True)

In [0]:
#create test and train datasets for random forest model
X_train, X_test, y_train, y_test = train_test_split(features, mpls_power_bins, random_state=42)

In [29]:
#train and test random forest model 
rf = RandomForestClassifier(n_estimators=200)
rf = rf.fit(X_train, y_train)
rf.score(X_test, y_test)

0.7947633434038268

In [0]:
#create list of feature importances
feature_importances=rf.feature_importances_.tolist()

In [0]:
#create data frame 
df = pd.DataFrame(list(sorted(zip(feature_importances,feature_names),reverse=False)), 
               columns =['Parameter', 'Importance']) 

In [77]:
#create figure to look at importance of parameters
fig = px.bar(df, x='Parameter', y='Importance', orientation='h', color='Parameter', 
             color_continuous_scale='RdBu', labels={'Parameter':'Importance', 'Importance':'Parameter'},)
fig.show()