In [6]:
import pandas as pd
import os

def combine_csv_files(directory):
    """
    Reads multiple CSV files from a directory, adds a 'y' column with specified values, and combines them into a single DataFrame.
    """
    print(len(os.listdir(directory)))

    combined_df = pd.DataFrame()
    csv_files = [f for f in os.listdir(directory) if f.endswith('.csv')]

    for i, file in enumerate(csv_files):
        label = file.split('_')[0]
        print(label)
        file_path = os.path.join(directory, file)
        try:
            df = pd.read_csv(file_path)
            df['y'] = label
            combined_df = pd.concat([combined_df, df], ignore_index=True)
        except Exception as e:
            print(f"Error reading or processing file {file}: {e}")
            return None

    return combined_df


directory_path = "raw_data"

os.makedirs(directory_path, exist_ok=True)

combined_data = combine_csv_files(directory_path)

print(combined_data.head())

6
zero
half
gun
fist
unknown
paper
   Channel_1  Channel_2  Channel_3  Channel_4  Channel_5  Channel_6  \
0        819        562        266        181        350        643   
1        730        544        265        185        369        646   
2        775        564        294        186        378        679   
3        763        508        289        203        371        611   
4        719        473        305        198        356        731   

   Channel_7  Channel_8     y  
0        936        519  zero  
1       1001        486  zero  
2       1025        475  zero  
3       1006        478  zero  
4       1034        429  zero  


In [7]:
combined_data

Unnamed: 0,Channel_1,Channel_2,Channel_3,Channel_4,Channel_5,Channel_6,Channel_7,Channel_8,y
0,819,562,266,181,350,643,936,519,zero
1,730,544,265,185,369,646,1001,486,zero
2,775,564,294,186,378,679,1025,475,zero
3,763,508,289,203,371,611,1006,478,zero
4,719,473,305,198,356,731,1034,429,zero
...,...,...,...,...,...,...,...,...,...
2980,261,33,35,41,53,232,264,134,paper
2981,262,36,35,41,56,210,263,131,paper
2982,258,36,33,41,52,180,230,126,paper
2983,326,43,36,39,65,178,226,179,paper


In [8]:
if combined_data is not None:
    grouped_data = combined_data.groupby('y').size().reset_index(name='count')
grouped_data


Unnamed: 0,y,count
0,fist,497
1,gun,496
2,half,498
3,paper,500
4,unknown,497
5,zero,497


In [10]:
combined_data['Channel_1'] = (combined_data['Channel_1']-combined_data['Channel_1'].min())/(combined_data['Channel_1'].max()-combined_data['Channel_1'].min())

In [12]:
combined_data['Channel_2'] = (combined_data['Channel_2']-combined_data['Channel_2'].min())/(combined_data['Channel_2'].max()-combined_data['Channel_2'].min())
combined_data['Channel_3'] = (combined_data['Channel_3']-combined_data['Channel_3'].min())/(combined_data['Channel_3'].max()-combined_data['Channel_3'].min())
combined_data['Channel_4'] = (combined_data['Channel_4']-combined_data['Channel_4'].min())/(combined_data['Channel_4'].max()-combined_data['Channel_4'].min())
combined_data['Channel_5'] = (combined_data['Channel_5']-combined_data['Channel_5'].min())/(combined_data['Channel_5'].max()-combined_data['Channel_5'].min())
combined_data['Channel_6'] = (combined_data['Channel_6']-combined_data['Channel_6'].min())/(combined_data['Channel_6'].max()-combined_data['Channel_6'].min())
combined_data['Channel_7'] = (combined_data['Channel_7']-combined_data['Channel_7'].min())/(combined_data['Channel_7'].max()-combined_data['Channel_7'].min())
combined_data['Channel_8'] = (combined_data['Channel_8']-combined_data['Channel_8'].min())/(combined_data['Channel_8'].max()-combined_data['Channel_8'].min())

In [13]:
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

X = combined_data.drop('y', axis=1)
y = combined_data['y']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

svm_model = SVC(kernel='linear', C=1)
svm_model.fit(X_train, y_train)

y_pred = svm_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

Accuracy: 0.8337053571428571


In [14]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': [1, 0.1, 0.01, 0.001],
    'kernel': ['rbf', 'linear']
}


grid = GridSearchCV(SVC(), param_grid, refit=True, verbose=2, cv=5)

grid.fit(X_train, y_train)

print("Best Parameters: ", grid.best_params_)
print("Best Estimator: ", grid.best_estimator_)

Fitting 5 folds for each of 32 candidates, totalling 160 fits
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.1s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.1s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.1s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.1s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.1s
[CV] END ......................C=0.1, gamma=1, kernel=linear; total time=   0.0s
[CV] END ......................C=0.1, gamma=1, kernel=linear; total time=   0.0s
[CV] END ......................C=0.1, gamma=1, kernel=linear; total time=   0.0s
[CV] END ......................C=0.1, gamma=1, kernel=linear; total time=   0.0s
[CV] END ......................C=0.1, gamma=1, kernel=linear; total time=   0.0s
[CV] END .......................C=0.1, gamma=0.1, kernel=rbf; total time=   0.1s
[CV] END .......................C=0.1, gamma=0.

In [15]:
from sklearn.metrics import classification_report

svm_model = SVC(C=100, gamma=1, kernel='rbf')
svm_model.fit(X_train, y_train)

svm_predictions = svm_model.predict(X_test)

print(classification_report(y_test, svm_predictions))

              precision    recall  f1-score   support

        fist       0.99      1.00      1.00       147
         gun       0.99      1.00      0.99       144
        half       0.95      0.95      0.95       151
       paper       0.96      0.96      0.96       140
     unknown       0.99      0.99      0.99       154
        zero       0.99      0.97      0.98       160

    accuracy                           0.98       896
   macro avg       0.98      0.98      0.98       896
weighted avg       0.98      0.98      0.98       896

