# Dependencies

In [1]:
# Import Dependencies
# Pandas and numpy for data manipulation
import pandas as pd
import numpy as np

# Matplotlib visualization
import matplotlib.pyplot as plt
%matplotlib inline

# Splitting data into training and testing
from sklearn.model_selection import train_test_split

# Preprocessing library to encode data
from sklearn import preprocessing

# Machine Learning Model
import tensorflow
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical

In [2]:
# Import Train and Test Data
train_data = pd.read_csv("train_final_clean_df.csv")
test_data = pd.read_csv("test_final_clean_df.csv")

FileNotFoundError: [Errno 2] File b'test_final_clean_df.csv' does not exist: b'test_final_clean_df.csv'

In [None]:
train_data.head(2)

In [None]:
test_data.head(2)

# Encode label columns

In [None]:
train_encoded_data = train_data.copy()
test_encoded_data = test_data.copy()

# Replace Unseen items in test data
test_encoded_data['source_screen_name'] = test_encoded_data['source_screen_name'].replace('People local','Unknown')
test_encoded_data['source_screen_name'] = test_encoded_data['source_screen_name'].replace('People global','Unknown')
test_encoded_data['genre_ids'] = test_encoded_data['genre_ids'].replace(1089,0)
test_encoded_data['genre_ids'] = test_encoded_data['genre_ids'].replace(166,0)
test_encoded_data['genre_ids'] = test_encoded_data['genre_ids'].replace(765,0)
test_encoded_data['genre_ids'] = test_encoded_data['genre_ids'].replace(1061,0)
test_encoded_data['genre_ids'] = test_encoded_data['genre_ids'].replace(303,0)
test_encoded_data['genre_ids'] = test_encoded_data['genre_ids'].replace(2045,0)


# Select the categorical columns
categorical_columns = ['source_system_tab', 'source_screen_name', 'source_type', 'genre_ids', 'gender']

i = 0
le = []

for column in categorical_columns:
    le.append(preprocessing.LabelEncoder())
    fit = le[i].fit(train_encoded_data[column])
    transform = le[i].transform(train_encoded_data[column])
    train_encoded_data[column] = transform
    i += 1
    
i = 0

for column in categorical_columns:
    transform = le[i].transform(test_encoded_data[column])
    test_encoded_data[column] = transform
    i += 1

In [None]:
train_encoded_data = train_encoded_data[["source_system_tab", "source_screen_name", "source_type",
                                         "song_length", "genre_ids", "language", "city", "age_group",
                                         "gender", "registered_via", "target"]]
train_encoded_data.head(2)

In [None]:
test_encoded_data = test_encoded_data[["source_system_tab", "source_screen_name", "source_type",
                                         "song_length", "genre_ids", "language", "city", "age_group",
                                         "gender", "registered_via"]]
test_encoded_data.head(2)

In [None]:
# Create features and targets datasets
features = train_encoded_data.drop("target", axis = 1)
targets = pd.DataFrame(train_encoded_data['target'])

In [None]:
features.head(2)

In [None]:
targets.head(2)

# Split Data in Trainning and Testing datasets

In [None]:
# Split into 70% training and 30% testing set
X, X_test, y, y_test = train_test_split(features, targets, test_size = 0.3, random_state = 42)

print(X.shape)
print(X_test.shape)
print(y.shape)
print(y_test.shape)

# Scale data

In [None]:
# Create the scaler object with a range of 0-1
scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))

# Fit on the training data
scaler.fit(X)

# Transform both the training and testing data
X = scaler.transform(X)
X_test = scaler.transform(X_test)

In [None]:
# Convert labels to one-hot-encoding
y = to_categorical(y)
y_test = to_categorical(y_test)

# Apply Deep Learning Model

In [None]:
# Create model and add layers
model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=10))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

In [None]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
model.fit(
    X,
    y,
    epochs=10,
    shuffle=True,
    verbose=2
)

## Quantify our Trained Model

In [None]:
model_loss, model_accuracy = model.evaluate(
    X_test, y_test, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

## Make Predictions

In [None]:
encoded_predictions = model.predict_classes(X_test[:5])

In [None]:
print(f"Predicted classes: {encoded_predictions}")
print(f"Actual Labels: {list(y_test[:5])}")