In [None]:
# Data Preparation and Processing: Loading Sample Data
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from scipy import stats

# Sample Data
data = {
    'Project Name': ['Project A', 'Project B', 'Project C', 'Project D', 'Project E'],
    'Budgeted Cost': [10000, 15000, np.nan, 20000, 25000],
    'Actual Cost': [12000, 14000, 21000, 24000, np.nan],
    'Start Date': ['2024-01-01', '2024-02-01', '2024-03-01', '2024-04-01', '2024-05-01'],
    'Team Size': [5, 6, 8, np.nan, 10]
}

df = pd.DataFrame(data)

# Handling Missing Data
df['Budgeted Cost'].fillna(df['Budgeted Cost'].mean(), inplace=True)
df['Actual Cost'].fillna(df['Actual Cost'].mean(), inplace=True)
df['Team Size'].fillna(df['Team Size'].mode()[0], inplace=True)

# Removing Duplicates
df.drop_duplicates(inplace=True)

# Handling Outliers: Using Z-Score
z_scores = np.abs(stats.zscore(df[['Budgeted Cost', 'Actual Cost']]))
df = df[(z_scores < 3).all(axis=1)]  # Keep only rows with Z-score < 3

# Data Transformation: Standardization and Normalization
scaler = StandardScaler()
df[['Budgeted Cost', 'Actual Cost']] = scaler.fit_transform(df[['Budgeted Cost', 'Actual Cost']])

# View the cleaned and transformed data
df
