## Combing all the csv files

In [1]:
import pandas as pd
import glob

# Path to the directory containing the CSV files
csv_directory = 'csv_data/*.csv'

# Use glob to get a list of all CSV files in the directory
csv_files = glob.glob(csv_directory)

# Initialize an empty list to store DataFrames
dfs = []

# Iterate through the CSV files and read them into DataFrames
for csv_file in csv_files:
    df = pd.read_csv(csv_file)
    dfs.append(df)

# Concatenate all DataFrames into one
combined_df = pd.concat(dfs, ignore_index=True)


In [2]:
combined_df.head()

Unnamed: 0,Employee ID,Frame,"NOSE (x, y)","LEFT_EYE (x, y)","RIGHT_EYE (x, y)","LEFT_EAR (x, y)","RIGHT_EAR (x, y)","LEFT_SHOULDER (x, y)","RIGHT_SHOULDER (x, y)","LEFT_ELBOW (x, y)",...,right_hip&right_knee - RIGHT_HIP - right_hip&left_hip,right_hip&right_knee - RIGHT_HIP - right_shoulder&right_hip,left_hip&left_knee - LEFT_KNEE - left_knee&left_ankle,left_hip&left_knee - LEFT_HIP - right_hip&left_hip,left_hip&left_knee - LEFT_HIP - left_shoulder&left_hip,right_shoulder&left_shoulder - RIGHT_SHOULDER - right_shoulder&right_hip,right_shoulder&left_shoulder - LEFT_SHOULDER - left_shoulder&left_hip,right_hip&left_hip - RIGHT_HIP - right_shoulder&right_hip,right_hip&left_hip - LEFT_HIP - left_shoulder&left_hip,Fatigue or not
0,1,1,"(1730.4343, 499.71573)","(1730.0718, 488.47086)","(1735.2671, 483.16837)","(1748.7151, 460.78976)","(1773.3285, 448.17874)","(1776.3436, 484.28323)","(1843.3269, 452.72836)","(1749.8611, 617.0585)",...,31.1,113.0,148.7,144.7,103.7,97.7,68.8,81.9,111.6,0
1,2,1,"(1590.8701, 589.2316)","(1597.0657, 580.8875)","(1591.0591, 577.44275)","(1578.7622, 566.05396)","(1571.85, 558.87067)","(1538.4595, 562.6858)","(1518.8684, 557.33167)","(1516.0548, 645.5139)",...,69.2,80.5,151.6,112.6,77.4,164.0,14.7,11.3,170.0,0
2,1,2,"(1730.4501, 499.4446)","(1729.9916, 488.30383)","(1735.4542, 482.63712)","(1747.4718, 461.22943)","(1773.4812, 447.74716)","(1773.2981, 485.01086)","(1844.315, 452.4399)","(1745.3423, 619.1491)",...,32.4,114.2,149.3,144.0,102.9,98.4,66.8,81.8,113.1,0
3,2,2,"(1590.149, 588.6149)","(1596.4396, 580.2695)","(1590.0571, 576.5344)","(1579.3652, 565.374)","(1570.4858, 557.154)","(1539.1362, 562.7054)","(1516.5193, 555.9392)","(1517.9889, 644.7959)",...,67.1,81.4,147.9,113.7,78.5,159.7,18.2,14.2,167.8,0
4,1,3,"(1731.1641, 501.01492)","(1730.6722, 489.73178)","(1736.074, 483.8995)","(1747.6708, 462.01212)","(1773.8024, 448.44772)","(1772.5045, 485.04547)","(1845.0645, 452.8699)","(1742.6753, 618.1172)",...,33.4,115.0,151.2,142.7,103.6,99.2,65.5,81.6,113.7,0


In [3]:
combined_df.shape

(7879, 36)

## Dropping unwanted columns

In [4]:
combined_df.drop(combined_df.columns[1:19], axis=1, inplace=True)

## Shuffling the rows

In [5]:
# Shuffle the rows
shuffled_df = combined_df.sample(frac=1, random_state=42)

# Display the shuffled DataFrame
shuffled_df.head()

Unnamed: 0,Employee ID,right_shoulder&right_elbow - RIGHT_ELBOW - right_elbow&right_wrist,right_shoulder&right_elbow - RIGHT_SHOULDER - right_shoulder&left_shoulder,right_shoulder&right_elbow - RIGHT_SHOULDER - right_shoulder&right_hip,left_shoulder&left_elbow - LEFT_ELBOW - left_elbow&left_wrist,left_shoulder&left_elbow - LEFT_SHOULDER - right_shoulder&left_shoulder,left_shoulder&left_elbow - LEFT_SHOULDER - left_shoulder&left_hip,right_hip&right_knee - RIGHT_KNEE - right_knee&right_ankle,right_hip&right_knee - RIGHT_HIP - right_hip&left_hip,right_hip&right_knee - RIGHT_HIP - right_shoulder&right_hip,left_hip&left_knee - LEFT_KNEE - left_knee&left_ankle,left_hip&left_knee - LEFT_HIP - right_hip&left_hip,left_hip&left_knee - LEFT_HIP - left_shoulder&left_hip,right_shoulder&left_shoulder - RIGHT_SHOULDER - right_shoulder&right_hip,right_shoulder&left_shoulder - LEFT_SHOULDER - left_shoulder&left_hip,right_hip&left_hip - RIGHT_HIP - right_shoulder&right_hip,right_hip&left_hip - LEFT_HIP - left_shoulder&left_hip,Fatigue or not
1402,1,155.6,100.1,18.2,177.9,94.4,36.5,161.0,99.0,157.4,168.9,95.4,139.2,118.3,57.9,58.4,125.4,0
1634,1,120.1,116.6,32.2,178.5,93.9,8.9,168.0,100.3,169.4,157.5,88.5,171.2,84.4,85.0,90.3,100.3,0
3184,1,143.6,75.6,39.1,179.1,129.2,73.3,87.7,7.8,67.6,97.5,179.8,50.2,114.7,55.9,59.8,129.6,1
4600,1,162.5,139.2,102.2,164.4,39.6,100.5,108.7,168.3,44.2,103.3,15.4,50.8,37.0,140.1,147.5,35.4,1
5164,2,171.1,81.4,39.8,151.6,122.0,11.9,146.5,92.6,127.3,146.4,86.2,130.6,41.6,133.9,140.1,44.4,1


In [6]:
import pandas as pd

# Assuming you have a DataFrame called 'df'
# Check for null values in each column
null_values = shuffled_df.isnull().sum()

# Display the count of null values for each column
print(null_values)

Employee ID                                                                   0
right_shoulder&right_elbow - RIGHT_ELBOW - right_elbow&right_wrist            0
right_shoulder&right_elbow - RIGHT_SHOULDER - right_shoulder&left_shoulder    0
right_shoulder&right_elbow - RIGHT_SHOULDER - right_shoulder&right_hip        0
left_shoulder&left_elbow - LEFT_ELBOW - left_elbow&left_wrist                 0
left_shoulder&left_elbow - LEFT_SHOULDER - right_shoulder&left_shoulder       0
left_shoulder&left_elbow - LEFT_SHOULDER - left_shoulder&left_hip             0
right_hip&right_knee - RIGHT_KNEE - right_knee&right_ankle                    2
right_hip&right_knee - RIGHT_HIP - right_hip&left_hip                         0
right_hip&right_knee - RIGHT_HIP - right_shoulder&right_hip                   0
left_hip&left_knee - LEFT_KNEE - left_knee&left_ankle                         1
left_hip&left_knee - LEFT_HIP - right_hip&left_hip                            0
left_hip&left_knee - LEFT_HIP - left_sho

## Train, Val and Test Split

In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Drop rows with null values
shuffled_df = shuffled_df.dropna()

# Define the features and target variable
X = shuffled_df.drop('Fatigue or not',axis=1)
y = shuffled_df['Fatigue or not']

# Split the data into training, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Display the shapes of the resulting sets
print("Training set shape:", X_train.shape, y_train.shape)
print("Validation set shape:", X_val.shape, y_val.shape)
print("Test set shape:", X_test.shape, y_test.shape)

Training set shape: (4725, 17) (4725,)
Validation set shape: (1575, 17) (1575,)
Test set shape: (1576, 17) (1576,)


In [8]:
# Count the occurrences of each unique value in y_test
print("Train data counts: ", y_train.value_counts(),"\n")
print("Val data counts: ", y_val.value_counts(),"\n")
print("Test data counts: ", y_test.value_counts(),"\n")

Train data counts:  Fatigue or not
1    3040
0    1685
Name: count, dtype: int64 

Val data counts:  Fatigue or not
1    1041
0     534
Name: count, dtype: int64 

Test data counts:  Fatigue or not
1    1008
0     568
Name: count, dtype: int64 



## Random Forest Classifier

In [9]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Create a Random Forest classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model on the training data
rf_model.fit(X_train, y_train)

In [10]:
# Make predictions on the validation set
y_val_pred = rf_model.predict(X_val)

# Evaluate the model's performance on the validation set
validation_accuracy = accuracy_score(y_val, y_val_pred)
validation_report = classification_report(y_val, y_val_pred)

print(f"Validation Set Accuracy: {validation_accuracy}")
print("Validation Set Classification Report:\n", validation_report)

Validation Set Accuracy: 0.9612698412698413
Validation Set Classification Report:
               precision    recall  f1-score   support

           0       0.95      0.94      0.94       534
           1       0.97      0.97      0.97      1041

    accuracy                           0.96      1575
   macro avg       0.96      0.96      0.96      1575
weighted avg       0.96      0.96      0.96      1575



In [11]:
# Make predictions on the test set
y_test_pred = rf_model.predict(X_test)

# Evaluate the model's performance on the test set
test_accuracy = accuracy_score(y_test, y_test_pred)
test_report = classification_report(y_test, y_test_pred)

print(f"Test Set Accuracy: {test_accuracy}")
print("Test Set Classification Report:\n", test_report)


Test Set Accuracy: 0.9625634517766497
Test Set Classification Report:
               precision    recall  f1-score   support

           0       0.95      0.94      0.95       568
           1       0.97      0.97      0.97      1008

    accuracy                           0.96      1576
   macro avg       0.96      0.96      0.96      1576
weighted avg       0.96      0.96      0.96      1576



In [12]:
# Predict classes and probabilities for test set
test_predictions = rf_model.predict(X_test)
test_probabilities = rf_model.predict_proba(X_test)

In [13]:
# Concatenate X_test and y_test DataFrames
test_data = pd.concat([X_test, y_test], axis=1)
test_data.reset_index(inplace=True)
test_data.head()

Unnamed: 0,index,Employee ID,right_shoulder&right_elbow - RIGHT_ELBOW - right_elbow&right_wrist,right_shoulder&right_elbow - RIGHT_SHOULDER - right_shoulder&left_shoulder,right_shoulder&right_elbow - RIGHT_SHOULDER - right_shoulder&right_hip,left_shoulder&left_elbow - LEFT_ELBOW - left_elbow&left_wrist,left_shoulder&left_elbow - LEFT_SHOULDER - right_shoulder&left_shoulder,left_shoulder&left_elbow - LEFT_SHOULDER - left_shoulder&left_hip,right_hip&right_knee - RIGHT_KNEE - right_knee&right_ankle,right_hip&right_knee - RIGHT_HIP - right_hip&left_hip,right_hip&right_knee - RIGHT_HIP - right_shoulder&right_hip,left_hip&left_knee - LEFT_KNEE - left_knee&left_ankle,left_hip&left_knee - LEFT_HIP - right_hip&left_hip,left_hip&left_knee - LEFT_HIP - left_shoulder&left_hip,right_shoulder&left_shoulder - RIGHT_SHOULDER - right_shoulder&right_hip,right_shoulder&left_shoulder - LEFT_SHOULDER - left_shoulder&left_hip,right_hip&left_hip - RIGHT_HIP - right_shoulder&right_hip,right_hip&left_hip - LEFT_HIP - left_shoulder&left_hip,Fatigue or not
0,4981,1,145.8,168.6,66.9,149.5,9.6,68.1,119.7,96.2,80.7,117.8,84.3,80.7,101.7,77.7,15.5,165.1,1
1,3881,2,142.3,81.6,11.4,171.4,118.6,39.0,156.2,38.1,122.8,156.4,138.6,118.7,93.0,79.6,84.7,102.7,0
2,4601,2,179.4,117.0,79.6,148.0,106.4,28.6,179.8,79.0,140.2,178.5,96.8,143.6,37.4,135.0,140.8,46.8,1
3,1119,2,147.5,152.2,5.3,156.3,23.3,2.4,165.6,174.6,173.8,165.3,8.9,173.5,157.5,20.9,0.8,177.7,0
4,6704,2,129.4,135.4,2.1,148.2,58.2,12.5,138.0,99.9,134.5,139.2,81.4,132.2,133.4,45.7,34.6,146.4,1


In [14]:
test_prob_df = pd.DataFrame(test_probabilities, columns=['non_fatigue', 'fatigue'])
test_prob_df.head()

Unnamed: 0,non_fatigue,fatigue
0,0.05,0.95
1,0.89,0.11
2,0.04,0.96
3,0.99,0.01
4,0.05,0.95


In [15]:
test_pred_df = pd.DataFrame(test_predictions, columns=['prediction'])
test_pred_df

Unnamed: 0,prediction
0,1
1,0
2,1
3,0
4,1
...,...
1571,1
1572,1
1573,1
1574,1


In [16]:
test_data["predictions"]=test_pred_df['prediction']
test_data["fatigue_index(%)"] = round(test_prob_df["fatigue"]*100,2)
test_data.tail(10)

Unnamed: 0,index,Employee ID,right_shoulder&right_elbow - RIGHT_ELBOW - right_elbow&right_wrist,right_shoulder&right_elbow - RIGHT_SHOULDER - right_shoulder&left_shoulder,right_shoulder&right_elbow - RIGHT_SHOULDER - right_shoulder&right_hip,left_shoulder&left_elbow - LEFT_ELBOW - left_elbow&left_wrist,left_shoulder&left_elbow - LEFT_SHOULDER - right_shoulder&left_shoulder,left_shoulder&left_elbow - LEFT_SHOULDER - left_shoulder&left_hip,right_hip&right_knee - RIGHT_KNEE - right_knee&right_ankle,right_hip&right_knee - RIGHT_HIP - right_hip&left_hip,...,left_hip&left_knee - LEFT_KNEE - left_knee&left_ankle,left_hip&left_knee - LEFT_HIP - right_hip&left_hip,left_hip&left_knee - LEFT_HIP - left_shoulder&left_hip,right_shoulder&left_shoulder - RIGHT_SHOULDER - right_shoulder&right_hip,right_shoulder&left_shoulder - LEFT_SHOULDER - left_shoulder&left_hip,right_hip&left_hip - RIGHT_HIP - right_shoulder&right_hip,right_hip&left_hip - LEFT_HIP - left_shoulder&left_hip,Fatigue or not,predictions,fatigue_index(%)
1566,2138,1,130.7,28.9,24.2,127.0,157.0,19.5,100.0,101.9,...,115.2,94.4,106.1,4.7,176.6,167.0,11.7,0,0,4.0
1567,1920,1,160.3,115.5,25.2,148.9,76.4,4.8,147.5,122.8,...,151.4,50.1,152.2,90.3,81.2,86.4,102.1,0,0,20.0
1568,3137,2,149.6,88.7,10.0,171.9,110.9,17.5,177.4,72.7,...,176.5,104.7,170.2,78.7,93.3,102.7,85.2,1,1,51.0
1569,2717,2,171.8,29.6,39.3,166.9,134.5,25.9,161.7,125.0,...,166.8,51.8,121.3,69.0,108.7,4.6,173.0,1,1,74.0
1570,6907,1,161.6,39.6,34.0,160.0,131.3,44.5,132.8,81.6,...,143.2,113.0,128.0,5.6,175.7,163.7,15.0,1,1,99.0
1571,5983,1,142.6,126.4,40.4,159.7,105.2,27.2,174.2,92.7,...,165.3,95.9,157.7,86.0,78.0,89.5,106.4,1,1,89.0
1572,7072,2,132.2,106.8,10.8,151.3,74.9,14.8,137.2,80.9,...,123.4,112.6,114.9,117.5,60.1,49.9,132.5,1,1,99.0
1573,2999,1,107.8,170.3,110.8,178.4,164.2,119.3,114.1,152.5,...,89.7,3.4,85.4,79.0,76.4,115.8,88.8,1,1,91.0
1574,7274,2,153.7,104.9,9.0,167.2,70.3,7.5,147.6,90.9,...,135.9,99.1,139.0,113.9,62.8,61.3,121.9,1,1,92.0
1575,2404,1,154.6,135.4,78.4,138.3,69.9,42.6,112.5,173.1,...,110.4,13.6,77.8,57.0,112.5,126.3,64.2,1,1,100.0


In [17]:
test_data.drop(['index'],axis=1,inplace=True)
test_data.tail(10)

Unnamed: 0,Employee ID,right_shoulder&right_elbow - RIGHT_ELBOW - right_elbow&right_wrist,right_shoulder&right_elbow - RIGHT_SHOULDER - right_shoulder&left_shoulder,right_shoulder&right_elbow - RIGHT_SHOULDER - right_shoulder&right_hip,left_shoulder&left_elbow - LEFT_ELBOW - left_elbow&left_wrist,left_shoulder&left_elbow - LEFT_SHOULDER - right_shoulder&left_shoulder,left_shoulder&left_elbow - LEFT_SHOULDER - left_shoulder&left_hip,right_hip&right_knee - RIGHT_KNEE - right_knee&right_ankle,right_hip&right_knee - RIGHT_HIP - right_hip&left_hip,right_hip&right_knee - RIGHT_HIP - right_shoulder&right_hip,left_hip&left_knee - LEFT_KNEE - left_knee&left_ankle,left_hip&left_knee - LEFT_HIP - right_hip&left_hip,left_hip&left_knee - LEFT_HIP - left_shoulder&left_hip,right_shoulder&left_shoulder - RIGHT_SHOULDER - right_shoulder&right_hip,right_shoulder&left_shoulder - LEFT_SHOULDER - left_shoulder&left_hip,right_hip&left_hip - RIGHT_HIP - right_shoulder&right_hip,right_hip&left_hip - LEFT_HIP - left_shoulder&left_hip,Fatigue or not,predictions,fatigue_index(%)
1566,1,130.7,28.9,24.2,127.0,157.0,19.5,100.0,101.9,91.0,115.2,94.4,106.1,4.7,176.6,167.0,11.7,0,0,4.0
1567,1,160.3,115.5,25.2,148.9,76.4,4.8,147.5,122.8,150.7,151.4,50.1,152.2,90.3,81.2,86.4,102.1,0,0,20.0
1568,2,149.6,88.7,10.0,171.9,110.9,17.5,177.4,72.7,175.4,176.5,104.7,170.2,78.7,93.3,102.7,85.2,1,1,51.0
1569,2,171.8,29.6,39.3,166.9,134.5,25.9,161.7,125.0,120.4,166.8,51.8,121.3,69.0,108.7,4.6,173.0,1,1,74.0
1570,1,161.6,39.6,34.0,160.0,131.3,44.5,132.8,81.6,114.7,143.2,113.0,128.0,5.6,175.7,163.7,15.0,1,1,99.0
1571,1,142.6,126.4,40.4,159.7,105.2,27.2,174.2,92.7,177.8,165.3,95.9,157.7,86.0,78.0,89.5,106.4,1,1,89.0
1572,2,132.2,106.8,10.8,151.3,74.9,14.8,137.2,80.9,130.7,123.4,112.6,114.9,117.5,60.1,49.9,132.5,1,1,99.0
1573,1,107.8,170.3,110.8,178.4,164.2,119.3,114.1,152.5,36.7,89.7,3.4,85.4,79.0,76.4,115.8,88.8,1,1,91.0
1574,2,153.7,104.9,9.0,167.2,70.3,7.5,147.6,90.9,152.2,135.9,99.1,139.0,113.9,62.8,61.3,121.9,1,1,92.0
1575,1,154.6,135.4,78.4,138.3,69.9,42.6,112.5,173.1,60.6,110.4,13.6,77.8,57.0,112.5,126.3,64.2,1,1,100.0
