# ## 1. Installation des dépendances

# ## 2. Entraînement du modèle (version notebook)

In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import LabelEncoder, RobustScaler
from sklearn.metrics import mean_absolute_error, r2_score
import plotly.express as px
import plotly.graph_objects as go
import dis

In [3]:
df = pd.read_csv('ndtv_data_final.csv')
df.head(100).T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
Name,OnePlus 7T Pro McLaren Edition,Realme X2 Pro,iPhone 11 Pro Max,iPhone 11,LG G8X ThinQ,OnePlus 7T,OnePlus 7T Pro,Samsung Galaxy Note 10+,Asus ROG Phone 2,Xiaomi Redmi K20 Pro,...,Samsung Galaxy M40,Redmi Note 7S,Motorola One Vision,Nubia Red Magic 3,Oppo A9,Google Pixel 3a XL,Samsung Galaxy A70,Black Shark 2,OnePlus 7,LG G8s ThinQ
Brand,OnePlus,Realme,Apple,Apple,LG,OnePlus,OnePlus,Samsung,Asus,Xiaomi,...,Samsung,Xiaomi,Motorola,Nubia,Oppo,Google,Samsung,Black Shark,OnePlus,LG
Model,7T Pro McLaren Edition,X2 Pro,iPhone 11 Pro Max,iPhone 11,G8X ThinQ,7T,7T Pro,Galaxy Note 10+,ROG Phone 2,Redmi K20 Pro,...,Galaxy M40,Redmi Note 7S,One Vision,Red Magic 3,A9,Pixel 3a XL,Galaxy A70,2,7,G8s ThinQ
Battery capacity (mAh),4085,4000,3969,3110,4000,3800,4085,4300,6000,4000,...,3500,4000,3500,5000,4020,3700,4500,4000,3700,3550
Screen size (inches),6.67,6.5,6.5,6.1,6.4,6.55,6.67,6.8,6.59,6.39,...,6.3,6.3,6.3,6.65,6.53,6.0,6.7,6.39,6.41,6.2
Touchscreen,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,...,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes
Resolution x,1440,1080,1242,828,1080,1080,1440,1440,1080,1080,...,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080
Resolution y,3120,2400,2688,1792,2340,2400,3120,3040,2340,2340,...,2340,2340,2520,2340,2340,2160,2400,2340,2340,2248
Processor,8,8,6,6,8,8,8,8,8,8,...,8,8,8,8,8,8,8,8,8,8


In [3]:
# Génération de données synthétiques si vous n'avez pas le CSV
def generate_sample_data():
    brands = ['Apple', 'Samsung', 'Xiaomi', 'OnePlus', 'Oppo']
    processors = ['A15', 'Snapdragon 888', 'Exynos 2100', 'Dimensity 1200', 'Helio G95']
    
    np.random.seed(42)
    n_samples = 500
    
    data = {
        'Brand': np.random.choice(brands, n_samples),
        'Battery capacity (mAh)': np.random.randint(3000, 6000, n_samples),
        'Screen size (inches)': np.round(np.random.uniform(5.0, 7.0, n_samples), 1),
        'Processor': np.random.choice(processors, n_samples),
        'RAM (MB)': np.random.choice([4000, 6000, 8000, 12000], n_samples),
        'Internal storage (GB)': np.random.choice([64, 128, 256, 512], n_samples),
        'Rear camera': np.random.randint(12, 108, n_samples),
        'Front camera': np.random.randint(8, 32, n_samples),
    }
    
    # Prix simulé avec une formule réaliste
    data['Price'] = (
        (data['RAM (MB)'] * 0.002) +
        (data['Internal storage (GB)'] * 1.5) +
        (data['Rear camera'] * 5) +
        (data['Battery capacity (mAh)'] * 0.03) +
        (data['Screen size (inches)'] * 50) +
        np.where(data['Brand'] == 'Apple', 300, 
                np.where(data['Brand'] == 'Samsung', 150, 50))
    ) * (1 + np.random.normal(0, 0.1, n_samples))
    
    return pd.DataFrame(data)

In [4]:
# Entraînement complet
df = generate_sample_data()

# Feature engineering
df['Camera_Total'] = df['Rear camera'] + df['Front camera']
df['RAM_GB'] = df['RAM (MB)'] / 1000
df['Price_per_GB'] = np.where(df['Internal storage (GB)'] > 0,
                            df['Price'] / df['Internal storage (GB)'],
                            df['Price'].mean())
df['Is_Premium'] = df['Brand'].isin(['Apple', 'Samsung', 'OnePlus']).astype(int)
df['LogPrice'] = np.log1p(df['Price'])

features = ['Brand', 'Battery capacity (mAh)', 'Screen size (inches)', 'Processor',
            'RAM (MB)', 'Internal storage (GB)', 'Rear camera', 'Front camera',
            'Price_per_GB', 'Camera_Total', 'RAM_GB', 'Is_Premium']

X = df[features]
y = df['LogPrice']

# Encodage
brand_encoder = LabelEncoder()
processor_encoder = LabelEncoder()
X.loc[:, 'Brand'] = brand_encoder.fit_transform(X['Brand'])
X.loc[:, 'Processor'] = processor_encoder.fit_transform(X['Processor'])

# Normalisation
scaler = RobustScaler()
X_scaled = scaler.fit_transform(X)

# Entraînement
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_scaled, y)

# Evaluation
y_pred = np.expm1(model.predict(X_scaled))
r2 = r2_score(np.expm1(y), y_pred)
print(f"Modèle entraîné avec R² = {r2:.3f}")

Modèle entraîné avec R² = 0.995


# ## 3. Interface de prédiction interactive

In [12]:
from ipywidgets import interact, widgets
from IPython.display import display, HTML

# Fonction de prédiction
def predict_phone_price(brand, processor, battery, screen_size, ram, storage, rear_cam, front_cam):
    try:
        # Préparation des données
        input_data = pd.DataFrame({
            'Brand': [brand],
            'Processor': [processor],
            'Battery capacity (mAh)': [battery],
            'Screen size (inches)': [screen_size],
            'RAM (MB)': [ram * 1000],
            'Internal storage (GB)': [storage],
            'Rear camera': [rear_cam],
            'Front camera': [front_cam],
            'Price_per_GB': [df['Price'].mean() / storage],
            'Camera_Total': [rear_cam + front_cam],
            'RAM_GB': [ram],
            'Is_Premium': [int(brand in ['Apple', 'Samsung', 'OnePlus'])]
        })
        
        # Transformation
        input_data['Brand'] = brand_encoder.transform(input_data['Brand'])
        input_data['Processor'] = processor_encoder.transform(input_data['Processor'])
        input_scaled = scaler.transform(input_data[features])
        
        # Prédiction
        predicted_price = np.expm1(model.predict(input_scaled)[0])
        similar_phones = df[
            (df['Internal storage (GB)'].between(storage * 0.8, storage * 1.2)) &
            (df['RAM (MB)'].between(ram * 800, ram * 1200))
        ]
        avg_price = similar_phones['Price'].mean() if not similar_phones.empty else predicted_price
        
        # Affichage
        display(HTML(f"""
        <style>
        .prediction-box {{
            border: 2px solid #1f77b4;
            border-radius: 10px;
            padding: 20px;
            margin: 10px 0;
            background-color: #f0f8ff;
        }}
        .price {{
            font-size: 24px;
            font-weight: bold;
            color: #1f77b4;
        }}
        </style>
        <div class="prediction-box">
            <h3>Résultats de prédiction</h3>
            <div class="price">Prix estimé: ${predicted_price:,.2f}</div>
            <p>Prix moyen pour téléphones similaires: ${avg_price:,.2f}</p>
            <p>Différence: {((predicted_price - avg_price)/avg_price*100):.1f}%</p>
        </div>
        """))
        
        # Visualisations
        fig = go.Figure()
        fig.add_trace(go.Bar(
            x=['Votre téléphone', 'Moyenne similaire'],
            y=[predicted_price, avg_price],
            marker_color=['#1f77b4', '#ff7f0e']
        ))
        fig.update_layout(title='Comparaison de prix', yaxis_title='Prix (USD)')
        fig.show()
        
    except Exception as e:
        display(HTML(f"<div style='color:red;'>Erreur: {str(e)}</div>"))

# Interface interactive
interact(
    predict_phone_price,
    brand=widgets.Dropdown(options=df['Brand'].unique(), description='Marque:'),
    processor=widgets.Dropdown(options=df['Processor'].unique(), description='Processeur:'),
    battery=widgets.IntSlider(min=1000, max=6000, step=100, value=4000, description='Batterie (mAh):'),
    screen_size=widgets.FloatSlider(min=4.0, max=7.0, step=0.1, value=6.0, description='Écran (pouces):'),
    ram=widgets.IntSlider(min=2, max=12, step=2, value=4, description='RAM (GB):'),
    storage=widgets.IntSlider(min=32, max=512, step=32, value=64, description='Stockage (GB):'),
    rear_cam=widgets.IntSlider(min=8, max=108, step=4, value=48, description='Caméra arrière (MP):'),
    front_cam=widgets.IntSlider(min=2, max=32, step=2, value=12, description='Caméra avant (MP):')
)

interactive(children=(Dropdown(description='Marque:', options=('OnePlus', 'Oppo', 'Xiaomi', 'Samsung', 'Apple'…

<function __main__.predict_phone_price(brand, processor, battery, screen_size, ram, storage, rear_cam, front_cam)>

# ## 4. Visualisations complémentaires

In [13]:
# Radar plot des caractéristiques
def show_radar_plot(brand, battery, screen_size, ram, storage, rear_cam, front_cam):
    fig = go.Figure()
    
    # Votre téléphone
    fig.add_trace(go.Scatterpolar(
        r=[battery/5000, screen_size/7, ram/8, storage/256, rear_cam/64, front_cam/32],
        theta=['Batterie', 'Écran', 'RAM', 'Stockage', 'Cam. Arrière', 'Cam. Avant'],
        fill='toself',
        name='Votre téléphone'
    ))
    
    # Moyenne de la marque
    brand_mean = df[df['Brand'] == brand].mean(numeric_only=True)
    fig.add_trace(go.Scatterpolar(
        r=[
            brand_mean['Battery capacity (mAh)']/5000,
            brand_mean['Screen size (inches)']/7,
            brand_mean['RAM (MB)']/8000,
            brand_mean['Internal storage (GB)']/256,
            brand_mean['Rear camera']/64,
            brand_mean['Front camera']/32
        ],
        theta=['Batterie', 'Écran', 'RAM', 'Stockage', 'Cam. Arrière', 'Cam. Avant'],
        fill='toself',
        name=f'Moyenne {brand}'
    ))
    
    fig.update_layout(
        polar=dict(radialaxis=dict(visible=True, range=[0, 1.2])),
        title='Comparaison des caractéristiques',
        template='plotly_white'
    )
    fig.show()

# Exemple d'utilisation
show_radar_plot('Samsung', 4500, 6.5, 6, 128, 50, 16)

# ## 5. Analyse des données

In [14]:
# Heatmap des corrélations
corr = df.select_dtypes(include=np.number).corr()
fig = px.imshow(corr, text_auto=True, aspect="auto", color_continuous_scale='RdBu_r')
fig.update_layout(title='Corrélations entre caractéristiques')
fig.show()

# Distribution des prix
fig = px.histogram(df, x='Price', nbins=30, title='Distribution des prix')
fig.show()