In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing  import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_absolute_error

from sklearn.svm import SVR
import warnings
warnings.filterwarnings("ignore")

In [166]:
df = pd.read_csv("/Users/sahithipriya/Downloads/sales-forecasting/train.csv")
df_test = pd.read_csv("/Users/sahithipriya/Downloads/sales-forecasting/test.csv")

In [168]:
df_test.head()

Unnamed: 0,ID,Company,Quarter,QuickRatio,InventoryRatio,RevenueGrowth,MarketshareChange,Bond rating,Stock rating,Region,Industry
0,7,CMP01,Q8,1.93,2.79,-0.03,-0.01,CCC,Buy,South,Metal Fabrication
1,8,CMP01,Q9,1.93,4.77,0.0,0.0,CCC,Buy,South,Metal Fabrication
2,16,CMP02,Q8,1.97,2.34,0.04,-0.03,A,Sell,West,Infrastructure
3,17,CMP02,Q9,1.93,4.14,0.04,0.01,BBB,Hold,West,Infrastructure
4,25,CMP03,Q8,0.67,,-0.05,-0.01,BB,Buy,East,Infrastructure


In [169]:
df.head()

Unnamed: 0,ID,Company,Quarter,QuickRatio,InventoryRatio,RevenueGrowth,MarketshareChange,Bond rating,Stock rating,Region,Industry,Sales
0,0,CMP01,Q1,2.02,7.71,0.05,-0.04,CCC,Buy,South,Metal Fabrication,1517.0
1,1,CMP01,Q2,2.01,4.1,0.03,0.0,CCC,Hold,South,Metal Fabrication,2968.0
2,2,CMP01,Q3,2.02,6.79,0.06,-0.02,CCC,Buy,South,Metal Fabrication,1497.0
3,3,CMP01,Q4,1.98,3.97,0.01,0.02,CCC,Buy,South,Metal Fabrication,2929.0
4,4,CMP01,Q5,1.96,7.41,-0.07,0.02,CCC,Buy,South,Metal Fabrication,1452.0


In [170]:
df_test.head()

Unnamed: 0,ID,Company,Quarter,QuickRatio,InventoryRatio,RevenueGrowth,MarketshareChange,Bond rating,Stock rating,Region,Industry
0,7,CMP01,Q8,1.93,2.79,-0.03,-0.01,CCC,Buy,South,Metal Fabrication
1,8,CMP01,Q9,1.93,4.77,0.0,0.0,CCC,Buy,South,Metal Fabrication
2,16,CMP02,Q8,1.97,2.34,0.04,-0.03,A,Sell,West,Infrastructure
3,17,CMP02,Q9,1.93,4.14,0.04,0.01,BBB,Hold,West,Infrastructure
4,25,CMP03,Q8,0.67,,-0.05,-0.01,BB,Buy,East,Infrastructure


In [171]:
df_test.shape

(150, 11)

In [172]:
df = df.drop(["ID","Quarter"], axis=1)
df.dropna(subset=['Sales'], inplace=True)

In [173]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', SimpleImputer(strategy="mean"), ["InventoryRatio"]),
        ('BondRating',OneHotEncoder(),['Bond rating']),
        ('StockRating',OneHotEncoder(),['Stock rating']),
        ('Region',OneHotEncoder(),['Region']),
        ('Industry',OneHotEncoder(),['Industry']),
        ('Company',OneHotEncoder(),['Company']),
    ],
)

In [174]:
X = df.drop(columns=["Sales"])
Y = df["Sales"]

X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.2, random_state=42)

In [175]:
df_encoded = preprocessor.fit_transform(X_train)
df_encoded

<420x98 sparse matrix of type '<class 'numpy.float64'>'
	with 3668 stored elements in Compressed Sparse Row format>

In [176]:
# model = Pipeline(
#     steps=[
#         ('preprocessor',preprocessor),
#         ('regressor',LinearRegression())
# ])
model = Pipeline(
    steps=[
        ('preprocessor',preprocessor),
        #('regressor',ExtraTreesRegressor(n_estimators=100, random_state=42))
        ('ann', MLPRegressor(hidden_layer_sizes=(115, 55), activation='relu', solver='adam', max_iter=1650))
])

In [177]:
model.fit(X_train, Y_train)

In [178]:
Y_train_pred = model.predict(X_train)
Y_test_pred = model.predict(X_test)

In [179]:
train_mae = mean_absolute_error(Y_train, Y_train_pred)

test_mae = mean_absolute_error(Y_test, Y_test_pred)

print("Train MAE:", train_mae)
print("Test MAE:", test_mae)

Train MAE: 658.1367372688566
Test MAE: 868.0253360527662


In [180]:
model.fit(X,Y)

In [181]:
idDF_test = df_test["ID"]

In [182]:
df_test = df_test.drop(["ID","Quarter"], axis=1)
df_test.head()

Unnamed: 0,Company,QuickRatio,InventoryRatio,RevenueGrowth,MarketshareChange,Bond rating,Stock rating,Region,Industry
0,CMP01,1.93,2.79,-0.03,-0.01,CCC,Buy,South,Metal Fabrication
1,CMP01,1.93,4.77,0.0,0.0,CCC,Buy,South,Metal Fabrication
2,CMP02,1.97,2.34,0.04,-0.03,A,Sell,West,Infrastructure
3,CMP02,1.93,4.14,0.04,0.01,BBB,Hold,West,Infrastructure
4,CMP03,0.67,,-0.05,-0.01,BB,Buy,East,Infrastructure


In [183]:
df_test.head(200)

Unnamed: 0,Company,QuickRatio,InventoryRatio,RevenueGrowth,MarketshareChange,Bond rating,Stock rating,Region,Industry
0,CMP01,1.93,2.79,-0.03,-0.01,CCC,Buy,South,Metal Fabrication
1,CMP01,1.93,4.77,0.00,0.00,CCC,Buy,South,Metal Fabrication
2,CMP02,1.97,2.34,0.04,-0.03,A,Sell,West,Infrastructure
3,CMP02,1.93,4.14,0.04,0.01,BBB,Hold,West,Infrastructure
4,CMP03,0.67,,-0.05,-0.01,BB,Buy,East,Infrastructure
...,...,...,...,...,...,...,...,...,...
145,CMP73,0.93,,0.01,0.01,BBB,Buy,West,Automobile
146,CMP74,1.30,6.18,0.00,-0.02,AA,Buy,South,Metal Fabrication
147,CMP74,1.28,4.96,0.04,0.01,AA,Buy,South,Metal Fabrication
148,CMP75,2.21,5.43,0.04,0.01,BBB,Hold,West,Automobile


In [184]:
final = model.predict(df_test)

In [185]:
print(type(final))

<class 'numpy.ndarray'>


In [186]:
predictions = pd.DataFrame(final,columns=["Sales"])

In [187]:
result = pd.concat([idDF_test,predictions],axis=1)
result.to_csv("submission31.csv",index=False)