In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA

In [2]:
# 1. Charger le dataset
df = pd.read_csv("datascience_salaries.csv")

In [3]:
# 2. Normaliser la colonne 'salary' entre 0 et 1
scaler = MinMaxScaler()
df['salary_normalized'] = scaler.fit_transform(df[['salary']])

In [4]:
# 3. Réduction de dimensions avec PCA (appliquée aux données numériques uniquement)
numeric_df = df.select_dtypes(include=['float64', 'int64']).drop(columns=['salary'])
pca = PCA(n_components=2)
pca_result = pca.fit_transform(numeric_df)
df[['PCA1', 'PCA2']] = pca_result

In [5]:
# 4. Regrouper par 'experience_level' et calculer la moyenne et la médiane
grouped_salary_stats = df.groupby('experience_level')['salary'].agg(['mean', 'median']).reset_index()

In [6]:
print(df)

      Unnamed: 0       job_title    job_type experience_level       location  \
0              0  Data scientist   Full Time           Senior  New York City   
1              2  Data scientist   Full Time           Senior         Boston   
2              3  Data scientist   Full Time           Senior         London   
3              4  Data scientist   Full Time           Senior         Boston   
4              5  Data scientist   Full Time           Senior  New York City   
...          ...             ...         ...              ...            ...   
1166        2243          ML Ops   Full Time           Senior        Toronto   
1167        2249          ML Ops   Full Time           Senior         Boston   
1168        2250          ML Ops   Full Time           Senior          Delhi   
1169        2255          ML Ops   Full Time           Senior  San Francisco   
1170        2259          ML Ops  Internship           Senior  New York City   

     salary_currency  salary  salary_no

In [7]:
print(df[['experience_level', 'salary', 'salary_normalized', 'PCA1', 'PCA2']].head())

  experience_level  salary  salary_normalized        PCA1      PCA2
0           Senior  149000           0.601010 -931.620836  0.427553
1           Senior  120000           0.454545 -929.620836  0.281083
2           Senior   68000           0.191919 -928.620837  0.018454
3           Senior  120000           0.454545 -927.620836  0.281077
4           Senior  149000           0.601010 -926.620836  0.427539


In [8]:
print(grouped_salary_stats)

  experience_level          mean   median
0            Entry  36111.111111  30000.0
1        Executive  76076.923077  46000.0
2              Mid  51786.885246  51000.0
3           Senior  75088.033012  68000.0
