In [1]:
import pandas as pd  
import numpy as np  
import matplotlib.pyplot as plt  
import seaborn as seabornInstance 

from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from sklearn.metrics import r2_score

from ipywidgets import interact

from scipy import stats

%matplotlib inline

def run_linear_regression(data, features, target):
    if isinstance(features, list):
        X = data[features].values
        y = data[target].values
    else:
        X = data[features].values.reshape(-1,1)
        y = data[target].values.reshape(-1,1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
    regressor = LinearRegression()  
    regressor.fit(X_train, y_train)
    return regressor, X_test, y_test

def assess_regression_result(regressor, y_test, y_pred):
    n = len(y_test)
    p = len(regressor.coef_)
    print('MONTH ' + str(p+1) + ' PREDICTION')
    print('Intercept:', regressor.intercept_)
    print('Coefficients:', regressor.coef_)
    print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))  
    print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))  
    print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
    r2 = r2_score(y_test, y_pred)
    print('R-squared:', r2) 
    adjusted_r2 = 1-((1-r2)*(n-1)/(n-p-1))
    print('Adjusted R-squared:', adjusted_r2)

def plot_actual_vs_predicted(df):
    df.plot(kind='bar',figsize=(16,10))
    plt.grid(which='major', linestyle='-', linewidth='0.5', color='green')
    plt.grid(which='minor', linestyle=':', linewidth='0.5', color='black')
    plt.show()

def forecast_month_2(month1_actual, regressor):
    month1_array = np.array(month1_actual).reshape(-1, 1)
    month2_array = regressor.predict(month1_array)
    month2_forecast = month2_array[0,0]
    return month1_array, month2_forecast

def forecast_next_month(previous_months_array, this_month_forecast, next_month_regressor):
    up_to_this_month_array = np.append(previous_months_array, np.array(this_month_forecast)).reshape(1, -1)
    next_month_array = next_month_regressor.predict(up_to_this_month_array)
    next_month_forecast = next_month_array[0]
    return up_to_this_month_array, next_month_forecast

def forecast_sales_curve(month1, horizon, outcome, assessModel):
    
    pd.options.display.float_format = '{:.1f}'.format
    data = pd.read_csv(outcome) 
    data_outliersRemoved = data[np.abs(data.month1-data.month1.mean()) <= (3*data.month1.std())]
    data = data_outliersRemoved
    
    months = ['month1','month2', 'month3', 'month4', 'month5', 'month6', 'month7', 'month8', 'month9', 'month10', 'month11']
    
    # Using month1 Sales to Predict month2
    regressor_1_2, X_test_1_2, y_test_1_2 = run_linear_regression(data, 'month1', 'month2')
    y_pred_1_2 = regressor_1_2.predict(X_test_1_2)
    
    # Using month1 and month2 Sales to Predict month3
    regressor_12_3, X_test_12_3, y_test_12_3 = run_linear_regression(data, months[0:2], 'month3')
    y_pred_12_3 = regressor_12_3.predict(X_test_12_3)
    
    # Using month1, month2 and month3 Sales to Predict month4
    regressor_123_4, X_test_123_4, y_test_123_4 = run_linear_regression(data, months[0:3], 'month4')
    y_pred_123_4 = regressor_123_4.predict(X_test_123_4)
    
    # Using month1, month2, month3 and month4 Sales to Predict month5
    regressor_1234_5, X_test_1234_5, y_test_1234_5 = run_linear_regression(data, months[0:4], 'month5')
    y_pred_1234_5 = regressor_1234_5.predict(X_test_1234_5)
    
    # Using month1, month2, month3, month4 and month5 Sales to Predict month6
    regressor_12345_6, X_test_12345_6, y_test_12345_6 = run_linear_regression(data, months[0:5], 'month6')
    y_pred_12345_6 = regressor_12345_6.predict(X_test_12345_6)
    
    # Using month1, month2, month3, month4, month5 and month6 Sales to Predict month7
    regressor_123456_7, X_test_123456_7, y_test_123456_7 = run_linear_regression(data, months[0:6], 'month7')
    y_pred_123456_7 = regressor_123456_7.predict(X_test_123456_7)
    
    # Using month1, month2, month3, month4, month5, month6 and month7 Sales to Predict month8
    regressor_1234567_8, X_test_1234567_8, y_test_1234567_8 = run_linear_regression(data, months[0:7], 'month8')
    y_pred_1234567_8 = regressor_1234567_8.predict(X_test_1234567_8)
    
    # Using month1, month2, month3, month4, month5, month6, month7 and month8 Sales to Predict month9
    regressor_12345678_9, X_test_12345678_9, y_test_12345678_9 = run_linear_regression(data, months[0:8], 'month9')
    y_pred_12345678_9 = regressor_12345678_9.predict(X_test_12345678_9)
    
    # Using month1, month2, month3, month4, month5, month6, month7, month8 and month9 Sales to Predict month10
    regressor_123456789_10, X_test_123456789_10, y_test_123456789_10 = run_linear_regression(data, months[0:9], 'month10')
    y_pred_123456789_10 = regressor_123456789_10.predict(X_test_123456789_10)
    
    # Using month1, month2, month3, month4, month5, month6, month7, month8, month9 and month10 Sales to Predict month11
    regressor_12345678910_11, X_test_12345678910_11, y_test_12345678910_11 = run_linear_regression(data, months[0:10], 'month11')
    y_pred_12345678910_11 = regressor_12345678910_11.predict(X_test_12345678910_11)
    
    # Using month1, month2, month3, month4, month5, month6, month7, month8, month9, month10 and month11 Sales to Predict month12
    regressor_1234567891011_12, X_test_1234567891011_12, y_test_1234567891011_12 = run_linear_regression(data, months[0:11], 'month12')
    y_pred_1234567891011_12 = regressor_1234567891011_12.predict(X_test_1234567891011_12)
    
    month1_array, month2_forecast = forecast_month_2(month1, regressor_1_2)
    month12_array, month3_forecast = forecast_next_month(month1_array, month2_forecast, regressor_12_3)
    month123_array, month4_forecast = forecast_next_month(month12_array, month3_forecast, regressor_123_4)
    month1234_array, month5_forecast = forecast_next_month(month123_array, month4_forecast, regressor_1234_5)
    month12345_array, month6_forecast = forecast_next_month(month1234_array, month5_forecast, regressor_12345_6)
    month123456_array, month7_forecast = forecast_next_month(month12345_array, month6_forecast, regressor_123456_7)
    month1234567_array, month8_forecast = forecast_next_month(month123456_array, month7_forecast, regressor_1234567_8)
    month12345678_array, month9_forecast = forecast_next_month(month1234567_array, month8_forecast, regressor_12345678_9)
    month123456789_array, month10_forecast = forecast_next_month(month12345678_array, month9_forecast, regressor_123456789_10)
    month12345678910_array, month11_forecast = forecast_next_month(month123456789_array, month10_forecast, regressor_12345678910_11)
    month1234567891011_array, month12_forecast = forecast_next_month(month12345678910_array, month11_forecast, regressor_1234567891011_12)   
     
    x_axis = np.array([1,2,3,4,5,6,7,8,9,10,11,12])
    y_axis = np.array([np.around(month1, 1), np.around(month2_forecast, 1), np.around(month3_forecast, 1), np.around(month4_forecast, 1), np.around(month5_forecast, 1), np.around(month6_forecast, 1), np.around(month7_forecast, 1), np.around(month8_forecast, 1), np.around(month9_forecast, 1), np.around(month10_forecast, 1), np.around(month11_forecast, 1), np.around(month12_forecast, 1)])
    
    #plt.plot(x_axis, y_axis)
    plt.rcParams["figure.figsize"] = (10,10)
    plt.axis([0, 13, 0, 5100])
    plt.plot(x_axis[0:horizon], y_axis[0:horizon])
    plt.title('Sales Curve Forecast up to Month ' + str(horizon))  
    plt.xlabel('Month')  
    if outcome == "net_units_months_1_to_12_digital_album_fromJul2016toJan2019.csv":
        plt.ylabel('Net Units') 
    elif outcome == "net_value_eur_months_1_to_12_digital_album_fromJul2016toJan2019.csv":
        plt.ylabel('Net Value (EUR)') 
    else:
        plt.ylabel('ERROR') 
    plt.xticks(np.arange(min(x_axis[0:horizon]), max(x_axis[0:horizon])+1, 1))
    for xy in zip(x_axis[0:horizon], y_axis[0:horizon]):   
        plt.annotate('%s' % xy[1], xy=xy, textcoords='data')
    plt.show()
    
    if assessModel:
        ### Assess Linear Regression Which Uses month1 Sales to Predict month2
        assess_regression_result(regressor_1_2, y_test_1_2, y_pred_1_2)
        plot_actual_vs_predicted(pd.DataFrame({'Actual': y_test_1_2.flatten(), 'Predicted': y_pred_1_2.flatten()}).head(25))
        
        ### Assess Linear Regression Which Uses month1 and month2 Sales to Predict month3
        assess_regression_result(regressor_12_3, y_test_12_3, y_pred_12_3)
        plot_actual_vs_predicted(pd.DataFrame({'Actual': y_test_12_3.flatten(), 'Predicted': y_pred_12_3.flatten()}).head(25))
        
        ### Assess Linear Regression Which Uses month1, month2 and month3 Sales to Predict month4
        assess_regression_result(regressor_123_4, y_test_123_4, y_pred_123_4)
        plot_actual_vs_predicted(pd.DataFrame({'Actual': y_test_123_4.flatten(), 'Predicted': y_pred_123_4.flatten()}).head(25))
        
        ### Assess Linear Regression Which Uses month1, month2, month3 and month4 Sales to Predict month5
        assess_regression_result(regressor_1234_5, y_test_1234_5, y_pred_1234_5)
        plot_actual_vs_predicted(pd.DataFrame({'Actual': y_test_1234_5.flatten(), 'Predicted': y_pred_1234_5.flatten()}).head(25))
        
        # Assess Linear Regression Which Uses month1, month2, month3, month4 and month5 Sales to Predict month6
        assess_regression_result(regressor_12345_6, y_test_12345_6, y_pred_12345_6)
        plot_actual_vs_predicted(pd.DataFrame({'Actual': y_test_12345_6.flatten(), 'Predicted': y_pred_12345_6.flatten()}).head(25))
        
        # Assess Linear Regression Which Uses month1, month2, month3, month4, month5 and month6 Sales to Predict month7
        assess_regression_result(regressor_123456_7, y_test_123456_7, y_pred_123456_7)
        plot_actual_vs_predicted(pd.DataFrame({'Actual': y_test_123456_7.flatten(), 'Predicted': y_pred_123456_7.flatten()}).head(25))
        
        # Assess Linear Regression Which Uses month1, month2, month3, month4, month5, month6 and month7 Sales to Predict month8
        assess_regression_result(regressor_1234567_8, y_test_1234567_8, y_pred_1234567_8)
        plot_actual_vs_predicted(pd.DataFrame({'Actual': y_test_1234567_8.flatten(), 'Predicted': y_pred_1234567_8.flatten()}).head(25))
        
        # Assess Linear Regression Which Uses month1, month2, month3, month4, month5, month6, month7 and month8 Sales to Predict month9
        assess_regression_result(regressor_12345678_9, y_test_12345678_9, y_pred_12345678_9)
        plot_actual_vs_predicted(pd.DataFrame({'Actual': y_test_12345678_9.flatten(), 'Predicted': y_pred_12345678_9.flatten()}).head(25))
        
        # Assess Linear Regression Which Uses month1, month2, month3, month4, month5, month6, month7, month8 and month9 Sales to Predict month10
        assess_regression_result(regressor_123456789_10, y_test_123456789_10, y_pred_123456789_10)
        plot_actual_vs_predicted(pd.DataFrame({'Actual': y_test_123456789_10.flatten(), 'Predicted': y_pred_123456789_10.flatten()}).head(25))       
        
        # Assess Linear Regression Which Uses month1, month2, month3, month4, month5, month6, month7, month8, month9 and month10 Sales to Predict month11
        assess_regression_result(regressor_12345678910_11, y_test_12345678910_11, y_pred_12345678910_11)
        plot_actual_vs_predicted(pd.DataFrame({'Actual': y_test_12345678910_11.flatten(), 'Predicted': y_pred_12345678910_11.flatten()}).head(25))

        # Assess Linear Regression Which Uses month1, month2, month3, month4, month5, month6, month7, month8, month9, month10 and month11 Sales to Predict month12
        assess_regression_result(regressor_1234567891011_12, y_test_1234567891011_12, y_pred_1234567891011_12)
        plot_actual_vs_predicted(pd.DataFrame({'Actual': y_test_1234567891011_12.flatten(), 'Predicted': y_pred_1234567891011_12.flatten()}).head(25))

interact(forecast_sales_curve, month1=(200, 5000, 100), horizon=(2, 12, 1), outcome={'Net Units': "net_units_months_1_to_12_digital_album_fromJul2016toJan2019.csv", 'Net Value (EUR)': "net_value_eur_months_1_to_12_digital_album_fromJul2016toJan2019.csv"}, assessModel = False);