In [4]:
# Import library 
import pandas as pd 

In [5]:
# Load dataset 
performance_df = pd.read_csv("Performance_Statistics.csv") 

In [6]:
# Preview first few rows of dataset 
print("Preview of dataset:")  
print(performance_df.head()) 

Preview of dataset:
   index   Date Time of Day  Placed Mental State  Eliminations  Assists  \
0      0  04/10       18:00      27        sober             2        0   
1      1  04/10       18:00      45        sober             1        2   
2      2  04/10       18:00      38         high             3        0   
3      3  04/10       19:00      30         high             1        3   
4      4  04/10       19:00      16         high             3        1   

   Revives  Accuracy  Hits  Head Shots  Distance Traveled  Materials Gathered  \
0        0      0.23    14           2             271.08                  20   
1        0      0.30    19           1             396.73                 123   
2        0      0.30    32           1             607.80                  71   
3        0      0.18    19           1             714.16                 244   
4        1      0.58    42          18            1140.00                 584   

   Materials Used  Damage Taken  Damage to

In [7]:
# DATA CLEANING 
# Standardise column names 
performance_df.columns = performance_df.columns.str.lower() 
# Confirm column name update 
print("Updated column names:")
print(performance_df.columns) 

Updated column names:
Index(['index', 'date', 'time of day', 'placed', 'mental state',
       'eliminations', 'assists', 'revives', 'accuracy', 'hits', 'head shots',
       'distance traveled', 'materials gathered', 'materials used',
       'damage taken', 'damage to players', 'damage to structures'],
      dtype='object')


In [8]:
# Check for missing values 
print("Missing values per column:")  
print(performance_df.isnull().sum()) 

Missing values per column:
index                   0
date                    0
time of day             0
placed                  0
mental state            0
eliminations            0
assists                 0
revives                 0
accuracy                0
hits                    0
head shots              0
distance traveled       0
materials gathered      0
materials used          0
damage taken            0
damage to players       0
damage to structures    0
dtype: int64


In [9]:
# Drop duplicate rows 
performance_df.drop_duplicates(inplace=True) 

In [10]:
# Check for impossible values 
print("Eliminations > 100:")  
print(performance_df[performance_df["eliminations"] > 100]) 
print("Placement < 1 (invalid placement):")  
print(performance_df[performance_df["placed"] < 1]) 

Eliminations > 100:
Empty DataFrame
Columns: [index, date, time of day, placed, mental state, eliminations, assists, revives, accuracy, hits, head shots, distance traveled, materials gathered, materials used, damage taken, damage to players, damage to structures]
Index: []
Placement < 1 (invalid placement):
Empty DataFrame
Columns: [index, date, time of day, placed, mental state, eliminations, assists, revives, accuracy, hits, head shots, distance traveled, materials gathered, materials used, damage taken, damage to players, damage to structures]
Index: []


In [11]:
# DATA TRANSFORMATION AND ANALYSIS 
# Calculate headshot accuracy (%) = headshots / hits * 100 
performance_df["headshot_accuracy"] = (performance_df["head shots"] / performance_df["hits"] * 100).round(0).astype(int) 

# Replace any missing or invalid values with 0 (including division by zero) 
performance_df["headshot_accuracy"] = performance_df["headshot_accuracy"].fillna(0) 

# Print first few rows to confirm the new column is added 
print("Updated dataset with 'headshot_accuracy' column:")  
print(performance_df.head()) 

Updated dataset with 'headshot_accuracy' column:
   index   date time of day  placed mental state  eliminations  assists  \
0      0  04/10       18:00      27        sober             2        0   
1      1  04/10       18:00      45        sober             1        2   
2      2  04/10       18:00      38         high             3        0   
3      3  04/10       19:00      30         high             1        3   
4      4  04/10       19:00      16         high             3        1   

   revives  accuracy  hits  head shots  distance traveled  materials gathered  \
0        0      0.23    14           2             271.08                  20   
1        0      0.30    19           1             396.73                 123   
2        0      0.30    32           1             607.80                  71   
3        0      0.18    19           1             714.16                 244   
4        1      0.58    42          18            1140.00                 584   

   materials 

In [12]:
# Inspect data to choose elimination category thresholds 
print("Minimum eliminations:", performance_df["eliminations"].min())  
print("Maximum eliminations:", performance_df["eliminations"].max())  
print("Average eliminations:", round(performance_df["eliminations"].mean(), 2)) 

Minimum eliminations: 0
Maximum eliminations: 8
Average eliminations: 2.52


In [13]:
# Categorise players’ ability based on their number of eliminations/kills 
def categorise_elims(x):  
    if x <= 2:  
        return "Low"  
    elif x <= 5:  
        return "Medium"  
    else:  
        return "High" 
performance_df["elimination_category"] = performance_df["eliminations"].apply(categorise_elims) 

In [14]:
# Count of players in each elimination category 
print(performance_df["elimination_category"].value_counts()) 

# Proportion (%) of each category 
print(performance_df["elimination_category"].value_counts(normalize=True) * 100)

elimination_category
Low       51
Medium    27
High       9
Name: count, dtype: int64
elimination_category
Low       58.620690
Medium    31.034483
High      10.344828
Name: proportion, dtype: float64


In [15]:
# Average eliminations and headshot accuracy per elimination category 
print("Average stats per elimination category:")  
print(performance_df.groupby("elimination_category")[["eliminations", "headshot_accuracy"]].mean().round(2)) 

Average stats per elimination category:
                      eliminations  headshot_accuracy
elimination_category                                 
High                          6.56              15.11
Low                           1.24              17.31
Medium                        3.59              16.44


In [16]:
# Average eliminations per time of day 
print("Average eliminations by time of day:") 
print(performance_df.groupby("time of day")["eliminations"].mean().round(2))

Average eliminations by time of day:
time of day
01:00 PM    1.00
02:00 PM    2.00
10:00 PM    1.00
11:00 PM    3.00
12:00       2.50
12:00 AM    6.00
13:00       2.25
14:00       3.06
15:00       2.33
16:00       1.80
17:00       1.33
18:00       2.43
19:00       2.40
20:00       2.64
21:00       2.25
22:00       3.00
23:00       2.67
Name: eliminations, dtype: float64


In [17]:
# Average eliminations per mental state 
print("Average eliminations by mental state:") 
print(performance_df.groupby("mental state")["eliminations"].mean().round(2)) 

Average eliminations by mental state:
mental state
high     2.50
sober    2.53
Name: eliminations, dtype: float64


In [18]:
# EXPORT CLEANED DATA 
performance_df.to_csv("fortnite_cleaned_project.csv", index=False)  
print("Saved cleaned dataset as 'fortnite_cleaned_project.csv'") 

Saved cleaned dataset as 'fortnite_cleaned_project.csv'
