In [36]:
import warnings
import numpy as np
import pandas as pd
import opendatasets as od

# Visuals 
import seaborn as sns
import matplotlib.pyplot as plt

# Metrics
from scipy.stats import skew
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.metrics import confusion_matrix, plot_roc_curve, plot_precision_recall_curve
from sklearn.model_selection import learning_curve

# Preparation
from sklearn.decomposition import PCA
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# ML models
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
warnings.filterwarnings("ignore")
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV 

In [123]:
df_train = pd.read_csv("train.csv") # Shape (42000, 785)
df_test_1 = pd.read_csv("test.csv") # Shape (28000, 784)
df_train_1 = df_train.drop(columns=['label'],axis = 1) # Shape (42000, 784)
df_train_label = df_train['label']
df_train_1[df_train_1 < 141] = 0
df_train_1[df_train_1 > 140] = 1

In [124]:
sums = []

# Loop through the columns in steps of 4
for i in range(0, len(df_train_1.columns), 4):
    # Sum up each set of 4 columns
    col_sum = df_train_1.iloc[:, i:i+4].sum(axis=1)
    # Append the sum to the list
    sums.append(col_sum)

# Create a new dataframe from the sums list
df2 = pd.concat(sums, axis=1)


In [125]:
df2[df2 > 0] = 1

In [126]:
line1 = df_train_1.iloc[:, :28]
line2 = df_train_1.iloc[:, 28:56]
line3 = df_train_1.iloc[:, 56:84]
line4 = df_train_1.iloc[:, 84:112]
line5 = df_train_1.iloc[:, 112:140]
line6 = df_train_1.iloc[:, 140:168]
line7 = df_train_1.iloc[:, 168:196]
line8 = df_train_1.iloc[:, 196:224]
line9 = df_train_1.iloc[:, 224:252]
line10 = df_train_1.iloc[:, 252:280]
line11 = df_train_1.iloc[:, 280:308]
line12 = df_train_1.iloc[:, 308:336]
line13 = df_train_1.iloc[:, 336:364]
line14 = df_train_1.iloc[:, 364:392]
line15 = df_train_1.iloc[:, 392:420]
line16 = df_train_1.iloc[:, 420:448]
line17 = df_train_1.iloc[:, 448:476]
line18 = df_train_1.iloc[:, 476:504]
line19 = df_train_1.iloc[:, 504:532]
line20 = df_train_1.iloc[:, 532:560]
line21 = df_train_1.iloc[:, 560:588]
line22 = df_train_1.iloc[:, 588:616]
line23 = df_train_1.iloc[:, 616:644]
line24 = df_train_1.iloc[:, 644:672]
line25 = df_train_1.iloc[:, 672:700]
line26 = df_train_1.iloc[:, 700:728]
line27 = df_train_1.iloc[:, 728:756]
line28 = df_train_1.iloc[:, 756:784]

In [127]:
df_list = (line1, line2, line3, line4, line5, line6, line7, line8, line9, line10, line11, line12, line13, line14, line15, line16, line17, line18, line19, line20, line21, line22, line23, line24, line25, line26, line27, line28)
for df_e in df_list:
    df_e.insert(loc=0, column='corr', value=5)
    df_e.insert(loc=29, column='corr2', value=6)

In [128]:
def merge_numeric(row):
    concatenated = ''.join(map(str, row))
    return int(concatenated)

# Apply the function to each row of the dataframe
line1 = line1.apply(merge_numeric, axis=1)
line2 = line2.apply(merge_numeric, axis=1)
line3 = line3.apply(merge_numeric, axis=1)
line4 = line4.apply(merge_numeric, axis=1)
line5 = line5.apply(merge_numeric, axis=1)
line6 = line6.apply(merge_numeric, axis=1)
line7 = line7.apply(merge_numeric, axis=1)
line8 = line8.apply(merge_numeric, axis=1)
line9 = line9.apply(merge_numeric, axis=1)
line10 = line10.apply(merge_numeric, axis=1)
line11 = line11.apply(merge_numeric, axis=1)
line12 = line12.apply(merge_numeric, axis=1)
line13 = line13.apply(merge_numeric, axis=1)
line14 = line14.apply(merge_numeric, axis=1)
line15 = line15.apply(merge_numeric, axis=1)
line16 = line16.apply(merge_numeric, axis=1)
line17 = line17.apply(merge_numeric, axis=1)
line18 = line18.apply(merge_numeric, axis=1)
line19 = line19.apply(merge_numeric, axis=1)
line20 = line20.apply(merge_numeric, axis=1)
line21 = line21.apply(merge_numeric, axis=1)
line22 = line22.apply(merge_numeric, axis=1)
line23 = line23.apply(merge_numeric, axis=1)
line24 = line24.apply(merge_numeric, axis=1)
line25 = line25.apply(merge_numeric, axis=1)
line26 = line26.apply(merge_numeric, axis=1)
line27 = line27.apply(merge_numeric, axis=1)
line28 = line28.apply(merge_numeric, axis=1)

In [129]:
all_lines = pd.concat([line1, line2, line3, line4, line5, line6, line7, line8, line9, line10, line11, line12, line13, line14, line15, line16, line17, line18, line19, line20, line21, line22, line23, line24, line25, line26, line27, line28, ], axis=1)

In [130]:
all_lines.head(30)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,18,19,20,21,22,23,24,25,26,27
0,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000110000006,500000000000000000001110000006,500000000000000000001110000006,500000000000000000011100000006,500000000000000000111000000006,500000000000000001110000000006,...,500000000001110000000000000006,500000000011100000000000000006,500000000111000000000000000006,500000000111000000000000000006,500000000111000000000000000006,500000000110000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006
1,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000100000000000006,500000000001111111100000000006,500000000111111111110000000006,500000000111111111111000000006,500000001111110011111100000006,500000011111110000111110000006,...,500000011111000000011110000006,500000001111100000111110000006,500000000111111111111100000006,500000000011111111111000000006,500000000011111111110000000006,500000000000111111000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006
2,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000001000000000000006,500000000000001100000000000006,500000000000001100000000000006,500000000000001100000000000006,500000000000001110000000000006,500000000000001110000000000006,...,500000000000001110000000000006,500000000000011110000000000006,500000000000011110000000000006,500000000000011110000000000006,500000000000011110000000000006,500000000000001100000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006
3,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000011000000000000000000006,500000001000000000010000000006,500000001100000000010000000006,500000001100000000010000000006,500000000100000000010000000006,...,500000000001001111000100000006,500000000001111000000100000006,500000000000000000000100000006,500000000000000000000100000006,500000000000000000000100000006,500000000000000000000110000006,500000000000000000000100000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006
4,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000011111000000000006,500000000001111111100000000006,500000000111111111110000000006,500000000111111111111000000006,500000001111111111111100000006,500000001111000001111110000006,...,500000111110000000001111000006,500000111110000000011111000006,500000011111100011111110000006,500000001111111111111100000006,500000000111111111110000000006,500000000001111110000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006
5,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000001111000000000006,500000000000011111100000000006,500000000000011001110000000006,500000000000110001110000000006,500000000001100000011000000006,500000000001100000011000000006,...,500000000110000001100000000006,500000000110000011100000000006,500000000110000011000000000006,500000000011011110000000000006,500000000011111100000000000006,500000000000111000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006
6,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000001000000000011000000006,500000001111111111111000000006,500000000000000001111000000006,...,500000000000000110000000000006,500000000000000110000000000006,500000000000000110000000000006,500000000000000110000000000006,500000000000000110000000000006,500000000000001100000000000006,500000000000001100000000000006,500000000000001100000000000006,500000000000001100000000000006,500000000000000000000000000006
7,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000111110000000000006,500000000111111111111100000006,500000011110000010000110000006,500000000000000000000011000006,...,500000100000000000001100000006,500000100000000000000100000006,500000100000000000000110000006,500000100000000000000110000006,500000010000000000000100000006,500000001100000000011000000006,500000000111100011110000000006,500000000000111110000000000006,500000000000000000000000000006,500000000000000000000000000006
8,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000111111000000006,500000000000011111111111000006,500000000000111111111111100006,500000000001111000000000100006,500000000011110000000000000006,...,500000000000000001110000000006,500000000000000001110000000006,500000000000000011100000000006,500000000000000111000000000006,500000000000011110000000000006,500000000011111100000000000006,500000000011110000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006
9,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000111100000000000006,500000001111111111000000000006,500000001111111111110000000006,500000000100000001110000000006,500000000000000011110000000006,500000000000000011110000000006,...,500000000000000111111000000006,500000000000001111110000000006,500000011111111111100000000006,500000011111111110000000000006,500000001111111100000000000006,500000000110000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006


In [145]:
X.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,18,19,20,21,22,23,24,25,26,27
0,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000110000006,500000000000000000001110000006,500000000000000000001110000006,500000000000000000011100000006,500000000000000000111000000006,500000000000000001110000000006,...,500000000001110000000000000006,500000000011100000000000000006,500000000111000000000000000006,500000000111000000000000000006,500000000111000000000000000006,500000000110000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006
1,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000100000000000006,500000000001111111100000000006,500000000111111111110000000006,500000000111111111111000000006,500000001111110011111100000006,500000011111110000111110000006,...,500000011111000000011110000006,500000001111100000111110000006,500000000111111111111100000006,500000000011111111111000000006,500000000011111111110000000006,500000000000111111000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006
2,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000001000000000000006,500000000000001100000000000006,500000000000001100000000000006,500000000000001100000000000006,500000000000001110000000000006,500000000000001110000000000006,...,500000000000001110000000000006,500000000000011110000000000006,500000000000011110000000000006,500000000000011110000000000006,500000000000011110000000000006,500000000000001100000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006
3,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000011000000000000000000006,500000001000000000010000000006,500000001100000000010000000006,500000001100000000010000000006,500000000100000000010000000006,...,500000000001001111000100000006,500000000001111000000100000006,500000000000000000000100000006,500000000000000000000100000006,500000000000000000000100000006,500000000000000000000110000006,500000000000000000000100000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006
4,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000011111000000000006,500000000001111111100000000006,500000000111111111110000000006,500000000111111111111000000006,500000001111111111111100000006,500000001111000001111110000006,...,500000111110000000001111000006,500000111110000000011111000006,500000011111100011111110000006,500000001111111111111100000006,500000000111111111110000000006,500000000001111110000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006,500000000000000000000000000006


In [115]:
all_lines.to_csv('fuck50.csv', index=False)

In [146]:
X = pd.read_csv("fuck50")



FileNotFoundError: [Errno 2] No such file or directory: 'fuck50'

In [138]:
rf_model = RandomForestClassifier() # Random Forest
svc_model = SVC()

In [139]:
accuracy_models = pd.DataFrame(columns=['Model', 'Training Accuracy', 'Testing Accuracy', 'Mean Squared Error', 'Mean Absolute Error' ])

In [140]:
X = all_lines
y = df_train_label

In [141]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

In [142]:
rf_model.fit(X_train, y_train)
y_train_pred = rf_model.predict(X_train)
y_test_pred = rf_model.predict(X_test)
train_accuracy = accuracy_score(y_train, y_train_pred)
test_accuracy = accuracy_score(y_test, y_test_pred)
mse = mean_squared_error(y_test, y_test_pred)
mae = mean_absolute_error(y_test, y_test_pred)
accuracy_models = accuracy_models.append({'Model': 'Random Forest', 'Training Accuracy': train_accuracy, 'Testing Accuracy': test_accuracy, 'Mean Squared Error': mse, 'Mean Absolute Error': mae}, ignore_index=True)
confusion_rf_model = confusion_matrix(y_test, y_test_pred)

In [143]:
accuracy_models.head()

Unnamed: 0,Model,Training Accuracy,Testing Accuracy,Mean Squared Error,Mean Absolute Error
0,Random Forest,0.26122,0.233333,18.347976,3.271786


In [47]:
df2.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,186,187,188,189,190,191,192,193,194,195
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [31]:
sum_columns = []

for i in range(0, len(df2.columns) // 2):
    sum_column = df2.iloc[:, i] + df2.iloc[:, i + len(df2.columns) // 2]
    sum_columns.append(sum_column)

df3 = pd.concat(sum_columns, axis=1) 

In [45]:

merged_df.to_csv('fuck45.csv', index=False)
