In [41]:
import pandas as pd

# Charger le dataset et afficher les premières lignes
df = pd.read_csv("./dataset/Steam_Games_2025.csv")
df.head(10)


Unnamed: 0,appid,name,release_date,required_age,price,dlc_count,metacritic_score,achievements,recommendations,categories,positive,negative,median_playtime_forever,peak_ccu,pct_pos_total,num_reviews_total
0,730,Counter-Strike 2,2012-08-21,0,0.0,1,0,1,4401572,"['Multi-player', 'Cross-Platform Multiplayer',...",7480813,1135108,5174,1212356,86,8632939
1,578080,PUBG: BATTLEGROUNDS,2017-12-21,0,0.0,0,0,37,1732007,"['Multi-player', 'PvP', 'Online PvP', 'Stats',...",1487960,1024436,0,616738,59,2513842
2,570,Dota 2,2013-07-09,0,0.0,2,90,0,14337,"['Multi-player', 'Co-op', 'Steam Trading Cards...",1998462,451338,898,555977,81,2452595
3,271590,Grand Theft Auto V Legacy,2015-04-13,17,0.0,0,96,77,1803063,"['Single-player', 'Multi-player', 'PvP', 'Onli...",1719950,250012,7101,117698,87,1803832
4,359550,Tom Clancy's Rainbow Six® Siege,2015-12-01,17,3.99,9,0,0,1165929,"['Single-player', 'Multi-player', 'PvP', 'Onli...",1152763,218446,2434,89916,84,1168020
5,440,Team Fortress 2,2007-10-10,0,0.0,1,92,520,41587,"['Multi-player', 'Cross-Platform Multiplayer',...",1025633,120619,0,50817,89,1146642
6,105600,Terraria,2011-05-16,0,9.99,2,83,115,1098792,"['Single-player', 'Multi-player', 'PvP', 'Onli...",1344773,34460,0,30516,97,1102434
7,252490,Rust,2018-02-08,0,39.99,4,69,92,992825,"['Multi-player', 'MMO', 'PvP', 'Online PvP', '...",1043708,152272,3675,200902,87,993856
8,4000,Garry's Mod,2006-11-29,0,5.99,0,0,29,984713,"['Single-player', 'Multi-player', 'PvP', 'Onli...",1106689,36727,1173,32384,96,985010
9,1172470,Apex Legends™,2020-11-04,0,0.0,0,88,12,1548,"['Multi-player', 'PvP', 'Online PvP', 'Co-op',...",660150,322363,667,151844,67,983230


## Prétraitement de données
### Transformation des données brutes en un format adapté pour l'analyse et la modélisation
Certaines colonnes du dataset contiennent des listes ou des chaînes de caractères complexes. Nous allons les transformer en valeur numérique pour faciliter l'analyse.
1. **release-date** : Transformer en format POSIX (timestamp).
2. **categories** : Extraire 'single_player' et 'multi_player' et les convertir en variables binaires (1 si présent, 0 sinon).

## Suppression des colonnes inutiles
La colonne **appid** n'apporte pas d'information pertinente pour notre analyse. Nous allons la supprimer du dataset.

In [42]:
## Supprimer 'appid' du dataset
if ('appid' in df.columns):
    df = df.drop(columns=['appid'])

## Transformer 'release_date' en format POSIX (timestamp) en secondes
df['release_date'] = pd.to_datetime(df['release_date']).astype(int) / 10**9


## Extraire 'single_player' et 'multi_player' de la colonne 'categories'

def extract_category(df: pd.DataFrame, categories_name: str) -> int:
    return 1 if categories_name and df.find(categories_name) != -1 else 0
df['single_player'] = df['categories'].apply(extract_category, args=('Single-player',))
df['multi_player'] = df['categories'].apply(extract_category, args=('Multi-player',))
df = df.drop(columns=['categories'])
df.head(10)

Unnamed: 0,name,release_date,required_age,price,dlc_count,metacritic_score,achievements,recommendations,positive,negative,median_playtime_forever,peak_ccu,pct_pos_total,num_reviews_total,single_player,multi_player
0,Counter-Strike 2,1345507000.0,0,0.0,1,0,1,4401572,7480813,1135108,5174,1212356,86,8632939,0,1
1,PUBG: BATTLEGROUNDS,1513814000.0,0,0.0,0,0,37,1732007,1487960,1024436,0,616738,59,2513842,0,1
2,Dota 2,1373328000.0,0,0.0,2,90,0,14337,1998462,451338,898,555977,81,2452595,0,1
3,Grand Theft Auto V Legacy,1428883000.0,17,0.0,0,96,77,1803063,1719950,250012,7101,117698,87,1803832,1,1
4,Tom Clancy's Rainbow Six® Siege,1448928000.0,17,3.99,9,0,0,1165929,1152763,218446,2434,89916,84,1168020,1,1
5,Team Fortress 2,1191974000.0,0,0.0,1,92,520,41587,1025633,120619,0,50817,89,1146642,0,1
6,Terraria,1305504000.0,0,9.99,2,83,115,1098792,1344773,34460,0,30516,97,1102434,1,1
7,Rust,1518048000.0,0,39.99,4,69,92,992825,1043708,152272,3675,200902,87,993856,0,1
8,Garry's Mod,1164758000.0,0,5.99,0,0,29,984713,1106689,36727,1173,32384,96,985010,1,1
9,Apex Legends™,1604448000.0,0,0.0,0,88,12,1548,660150,322363,667,151844,67,983230,0,1
