In [1]:
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

df = pd.read_csv('athlete_injury_risks.csv')

df['Gender'] = df['Gender'].map({'Male': 0, 'Female': 1})
df['Previous Injuries'] = df['Previous Injuries'].map({'Yes': 1, 'No': 0})
df['Training Intensity'] = df['Training Intensity'].map({'Low': 0, 'Medium': 1, 'High': 2})
df['Sleep Quality'] = df['Sleep Quality'].map({'Poor': 0, 'Fair': 1, 'Good': 2})
df['Movement Patterns'] = df['Movement Patterns'].map({'Normal': 0, 'Abnormal': 1})
df['Fatigue Level'] = df['Fatigue Level'].map({'Low': 0, 'Medium': 1, 'High': 2})
df['Environmental Factors'] = df['Environmental Factors'].map({'Sunny': 0, 'Rainy': 1, 'Snowy': 2, 'Humid': 3, 'Dry': 4})
df['Training Type'] = df['Training Type'].map({'Running': 0, 'Weightlifting': 1, 'Football': 2, 'Cycling': 3})

X = df.drop('Injury Risk (Target)', axis=1)
y = df['Injury Risk (Target)']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

pca = PCA(n_components=5)  
X_pca = pca.fit_transform(X_scaled)

X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)

classifier = RandomForestClassifier(random_state=42)
classifier.fit(X_train, y_train)

unseen_data = pd.DataFrame({
    'Gender': [0],  # Male
    'Previous Injuries': [1],  # Yes
    'Training Intensity': [2],  # High
    'Sleep Quality': [2],  # Good
    'Movement Patterns': [0],  # Normal
    'Fatigue Level': [1],  # Medium
    'Environmental Factors': [3],  # Humid
    'Training Type': [1]  # Weightlifting
})


unseen_data_scaled = scaler.transform(unseen_data)

unseen_data_pca = pca.transform(unseen_data_scaled)

unseen_prediction = classifier.predict(unseen_data_pca)

print("Prediction for the unseen data:", unseen_prediction)


Prediction for the unseen data: [1]
