In [1]:
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.preprocessing import OneHotEncoder

In [2]:
METRICS = [
      keras.metrics.TruePositives(name='tp'),
      keras.metrics.FalsePositives(name='fp'),
      keras.metrics.TrueNegatives(name='tn'),
      keras.metrics.FalseNegatives(name='fn'), 
      keras.metrics.BinaryAccuracy(name='accuracy'),
      #keras.metrics.Precision(name='precision'),
      #keras.metrics.Recall(name='recall'),
      #keras.metrics.AUC(name='auc'),
      #keras.metrics.AUC(name='prc', curve='PR'), # precision-recall curve
]


In [3]:
df = pd.read_excel('C:/Users/rtribrunner/Asztal/Thesis Workspace/Aut market size.xlsx', sheet_name='Marketing dataset')

In [4]:
meret = df['Méret'].astype('category').cat.codes
multi = df['Multi'].astype('category').cat.codes
sector = df['Szektor'].astype('category').cat.codes
features = pd.concat([meret, multi, sector],axis=1)
labels = df['Termék'].astype('category').cat.codes
NO_SCENARIOS = 24
labels = labels[:-NO_SCENARIOS]
labels = to_categorical(labels, num_classes=3)


encoder = OneHotEncoder()
encoder.fit(features)
features = encoder.transform(features).toarray()

scenarios = features[-NO_SCENARIOS:, :]
features = features[:-NO_SCENARIOS, :]


X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.35, random_state=42)

In [5]:
marketing_model = tf.keras.Sequential([
      tf.keras.layers.Dense(64, activation='relu',input_shape=(12,)),
      tf.keras.layers.Dropout(0.5),
      tf.keras.layers.Dense(64, activation='relu'),
      tf.keras.layers.Dense(3, activation='softmax'),
])

marketing_model.compile(optimizer='adam',
      loss=keras.losses.CategoricalCrossentropy(),
      metrics=METRICS)

class_weights = { 0: 2, 1: 1, 2: 3.5 } # nagy súly - kis százalék

BATCH_SIZE = 1 

marketing_history = marketing_model.fit(
    X_train,
    y_train,
    batch_size=BATCH_SIZE,
    epochs=20,
    class_weight=class_weights
    )

loss, tp, fp, tn, fn, test_acc = marketing_model.evaluate(features, labels)
print('Test accuracy:', test_acc)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test accuracy: 0.8285714387893677


In [6]:
prediction = marketing_model.predict(scenarios)

print("Predicted: "+ str(prediction))
#print("Forecasted: "+ str(y_test))

Predicted: [[0.06652851 0.8970501  0.03642135]
 [0.1706996  0.7855578  0.04374256]
 [0.07689264 0.7665632  0.15654422]
 [0.1194737  0.76081467 0.11971159]
 [0.05940019 0.9344119  0.00618799]
 [0.2993874  0.6917006  0.00891193]
 [0.06278621 0.89872426 0.03848941]
 [0.2511931  0.6960241  0.05278278]
 [0.330787   0.6422959  0.02691706]
 [0.73244816 0.25307778 0.01447404]
 [0.452632   0.4603463  0.08702166]
 [0.6092081  0.34026837 0.0505236 ]
 [0.33279046 0.64223933 0.02497022]
 [0.6842692  0.2987304  0.01700041]
 [0.48158    0.4112092  0.10721079]
 [0.6458505  0.2981964  0.05595313]
 [0.03406966 0.9549826  0.01094784]
 [0.12469428 0.86229044 0.01301522]
 [0.04675416 0.9230614  0.03018453]
 [0.08276985 0.88372964 0.03350055]
 [0.55766153 0.4136685  0.02866994]
 [0.68609977 0.29335496 0.02054523]
 [0.5253953  0.39324382 0.08136094]
 [0.51104206 0.42615324 0.06280467]]


In [8]:
output = pd.DataFrame(prediction)

output_file = "output.xlsx"

output.to_excel(output_file, index=False)

#Forrás: https://www.tensorflow.org/tutorials/structured_data/imbalanced_data#class_weights