# üß™ Test Notebook - Cinema Creuse

**Notebook interactif pour tester les dataframes et fonctions un par un**

---

## üìã Sections

1. **Import & Config** - Charger les librairies
2. **df_movies** - Explorer le dataset IMDb
3. **Recommandations** - Tester les fonctions
4. **API TMDb** - Tester l'enrichissement
5. **Excel** - Tester les donn√©es B2B
6. **Utilisateurs** - Tester la gestion profils
7. **Playground** - Zone de test libre

---

## 1Ô∏è‚É£ Import & Configuration

In [1]:
# Imports
import pandas as pd
import numpy as np
from pathlib import Path
import sys
import json

# Config Pandas
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 20)
pd.set_option('display.width', None)

# Path vers app/
PROJECT_ROOT = Path.cwd().parent
APP_DIR = PROJECT_ROOT / "app"
sys.path.insert(0, str(APP_DIR))

print("‚úÖ Imports OK")
print(f"üìÇ Project : {PROJECT_ROOT}")
print(f"üìÇ App : {APP_DIR}")

‚úÖ Imports OK
üìÇ Project : c:\Users\paulc
üìÇ App : c:\Users\paulc\app


In [4]:
# Chemins fichiers
DATA_DIR = PROJECT_ROOT / "data"
PARQUET_PATH = PROJECT_ROOT  / "PARQUETS" / "imdb_complet_avec_cast.parquet"
EXCEL_PATH = DATA_DIR / "Cinemas_existants_creuse.xlsx"

print(f"üìÑ Parquet : {PARQUET_PATH.exists()} - {PARQUET_PATH}")
print(f"üìÑ Excel : {EXCEL_PATH.exists()} - {EXCEL_PATH}")

üìÑ Parquet : False - c:\Users\paulc\PARQUETS\imdb_complet_avec_cast.parquet
üìÑ Excel : False - c:\Users\paulc\data\Cinemas_existants_creuse.xlsx


---

## 2Ô∏è‚É£ DataFrame IMDb (df_movies)

In [5]:
# Charger le dataset
print("üîÑ Chargement...")
df_movies = pd.read_parquet(PARQUET_PATH)

print(f"‚úÖ {len(df_movies):,} films charg√©s")
print(f"üìä {df_movies.shape[0]:,} lignes x {df_movies.shape[1]} colonnes")
print(f"üíæ {df_movies.memory_usage(deep=True).sum() / 1024**2:.1f} MB")

üîÑ Chargement...


FileNotFoundError: [Errno 2] No such file or directory: 'c:\\Users\\paulc\\PARQUETS\\imdb_complet_avec_cast.parquet'

In [None]:
# Aper√ßu
df_movies.head()

In [None]:
# Colonnes
print("üìã COLONNES :")
for i, col in enumerate(df_movies.columns, 1):
    non_null = df_movies[col].notna().sum()
    pct = non_null / len(df_movies) * 100
    print(f"{i:2d}. {col:25} : {non_null:>7,} ({pct:>5.1f}%)")

In [None]:
# Info compl√®te
df_movies.info()

In [None]:
# Stats
df_movies.describe()

In [None]:
# Statistiques cl√©s
print("üìä STATS CL√âS")
print("="*60)

print(f"\n‚≠ê NOTES")
print(f"  Min : {df_movies['averageRating'].min():.1f}")
print(f"  Max : {df_movies['averageRating'].max():.1f}")
print(f"  Moyenne : {df_movies['averageRating'].mean():.2f}")
print(f"  M√©diane : {df_movies['averageRating'].median():.2f}")

print(f"\nüìÖ ANN√âES")
print(f"  P√©riode : {df_movies['startYear'].min():.0f} - {df_movies['startYear'].max():.0f}")

print(f"\n‚è±Ô∏è DUR√âE")
print(f"  Min : {df_movies['runtimeMinutes'].min():.0f} min")
print(f"  Max : {df_movies['runtimeMinutes'].max():.0f} min")
print(f"  Moyenne : {df_movies['runtimeMinutes'].mean():.0f} min")

print(f"\nüá´üá∑ TITRES FRAN√áAIS")
fr_count = df_movies['frenchTitle'].notna().sum()
print(f"  {fr_count:,} films ({fr_count/len(df_movies)*100:.1f}%)")

In [None]:
# Top 10 films
print("üèÜ TOP 10 FILMS")
df_movies.nlargest(10, 'averageRating')[['primaryTitle', 'frenchTitle', 'startYear', 'averageRating', 'numVotes']]

In [None]:
# Distribution genres
print("üé≠ GENRES")
print("="*60)

genre_cols = [col for col in df_movies.columns 
              if col not in ['tconst', 'primaryTitle', 'originalTitle', 'frenchTitle', 
                             'startYear', 'runtimeMinutes', 'genres', 'isAdult', 
                             'averageRating', 'numVotes', 'acteurs', 'realisateurs', 'region']
              and df_movies[col].dtype in [int, float]]

genre_counts = df_movies[genre_cols].sum().sort_values(ascending=False)

print(f"\nNombre de genres : {len(genre_counts)}")
print("\nTop 15 :")
for i, (genre, count) in enumerate(genre_counts.head(15).items(), 1):
    pct = count / len(df_movies) * 100
    print(f"{i:2}. {genre:20} : {count:>7,} ({pct:>5.1f}%)")

In [None]:
# Casting
print("üé¨ CASTING")
print("="*60)

has_actors = df_movies['acteurs'].apply(lambda x: len(x) > 0 if isinstance(x, list) else False).sum()
has_directors = df_movies['realisateurs'].apply(lambda x: len(x) > 0 if isinstance(x, list) else False).sum()

print(f"\nFilms avec acteurs : {has_actors:,} ({has_actors/len(df_movies)*100:.1f}%)")
print(f"Films avec r√©alisateurs : {has_directors:,} ({has_directors/len(df_movies)*100:.1f}%)")

# Exemple
sample = df_movies[has_directors > 0].sample(1).iloc[0]
print(f"\nExemple : {sample['primaryTitle']} ({sample['startYear']:.0f})")
print(f"  R√©alisateur : {', '.join(sample['realisateurs'])}")
if len(sample['acteurs']) > 0:
    print(f"  Acteurs : {', '.join(sample['acteurs'][:5])}")

---

## 3Ô∏è‚É£ Test Recommandations

In [None]:
# Import fonctions
from utils import get_recommendations, get_personalized_recommendations
print("‚úÖ Fonctions recommandations import√©es")

In [None]:
# Test 1 : Recommandations pour Matrix
print("üé¨ TEST : Films similaires √† Matrix")
print("="*60)

# Trouver Matrix
matrix_idx = df_movies[df_movies['primaryTitle'].str.contains('Matrix', case=False, na=False)].index[0]
matrix = df_movies.loc[matrix_idx]

print(f"\nFilm : {matrix['primaryTitle']} ({matrix['startYear']:.0f})")
print(f"Note : {matrix['averageRating']:.1f}/10")

# Recommandations
reco_df, method = get_recommendations(df_movies, matrix_idx, n=6)

print(f"\n‚úÖ {len(reco_df)} films similaires (m√©thode: {method})")
print("\nüìã Recommandations :")
for i, (idx, film) in enumerate(reco_df.iterrows(), 1):
    titre = film.get('frenchTitle', film['primaryTitle'])
    if pd.isna(titre):
        titre = film['primaryTitle']
    print(f"{i}. {titre} ({film['startYear']:.0f}) - {film['averageRating']:.1f}/10")

In [None]:
# Test 2 : Recommandations personnalis√©es
print("‚ú® TEST : Recommandations personnalis√©es")
print("="*60)

# Simuler go√ªts utilisateur
liked_films = df_movies[df_movies['averageRating'] > 8.5].sample(5)
disliked_films = []

print("\nüëç Films aim√©s :")
for idx, film in liked_films.iterrows():
    titre = film.get('frenchTitle', film['primaryTitle'])
    if pd.isna(titre):
        titre = film['primaryTitle']
    print(f"  - {titre}")

# Calculer recommandations
recommended = get_personalized_recommendations(df_movies, liked_films, disliked_films, top_n=10)

print(f"\n‚úÖ {len(recommended)} films recommand√©s")
print("\nüìã Top 10 :")
for i, (idx, film) in enumerate(recommended.head(10).iterrows(), 1):
    titre = film.get('titre', film.get('primaryTitle', 'Sans titre'))
    annee = film.get('startYear', 0)
    note = film.get('note', film.get('averageRating', 0))
    score = film.get('score_recommandation', 0)
    print(f"{i}. {titre} ({annee:.0f}) - {note:.1f}/10 - Score: {score:.0f}%")

---

## 4Ô∏è‚É£ Test API TMDb

In [None]:
# Import fonctions API
from utils import search_tmdb_by_title, get_movie_details_from_tmdb, enrich_movie_with_tmdb
print("‚úÖ Fonctions API import√©es")

In [None]:
# Test recherche
print("üîç TEST : Recherche Matrix sur TMDb")
print("="*60)

result = search_tmdb_by_title("Matrix", 1999)

if result:
    print("\n‚úÖ Film trouv√© :")
    print(f"  ID : {result['id']}")
    print(f"  Titre : {result['title']}")
    print(f"  Date : {result['release_date']}")
    print(f"  Note : {result['vote_average']}/10")
else:
    print("‚ùå Film non trouv√©")

In [None]:
# Test d√©tails
if result:
    print("üìù TEST : D√©tails du film")
    print("="*60)
    
    details = get_movie_details_from_tmdb(result['id'])
    
    if details:
        print("\n‚úÖ D√©tails r√©cup√©r√©s :")
        print(f"\n  Titre : {details['title']}")
        print(f"  Synopsis : {details['synopsis'][:150]}...")
        print(f"  R√©alisateur : {details['director']}")
        print(f"  Acteurs : {', '.join(details['cast'][:5])}")
        print(f"  Genres : {', '.join(details['genres'])}")
        print(f"  Dur√©e : {details['runtime']} min")
        print(f"  Poster : {details['poster_url'][:80]}...")

In [None]:
# Test enrichissement
print("‚ú® TEST : Enrichissement d'un film")
print("="*60)

film_sample = df_movies.sample(1).iloc[0]
print(f"\nFilm : {film_sample['primaryTitle']} ({film_sample['startYear']:.0f})")

enriched = enrich_movie_with_tmdb(film_sample)

print("\n‚úÖ Film enrichi :")
print(f"  Titre : {enriched['title']}")
print(f"  Synopsis : {enriched.get('synopsis', 'N/A')[:100]}...")
print(f"  R√©alisateur : {enriched.get('director', 'N/A')}")
print(f"  Poster : {'Oui' if enriched.get('poster_url') else 'Non'}")

---

## 5Ô∏è‚É£ Test DataFrame Excel

In [None]:
# Charger Excel
print("üìä Chargement Excel")
print("="*60)

excel_file = pd.ExcelFile(EXCEL_PATH)
print(f"\nFeuilles : {len(excel_file.sheet_names)}")

excel_data = {}
for sheet in excel_file.sheet_names:
    df = pd.read_excel(EXCEL_PATH, sheet_name=sheet)
    excel_data[sheet] = df
    print(f"  ‚úÖ {sheet:30} : {df.shape[0]:>4} x {df.shape[1]:>2}")

In [None]:
# Explorer feuille Population
print("üìã Feuille : Population_creuse")
df_pop = excel_data['Population_creuse']

print(f"\nColonnes : {list(df_pop.columns)}")
display(df_pop.head(10))

In [None]:
# Explorer feuille Dipl√¥me
print("üìã Feuille : Diplome_creuse")
df_diplome = excel_data['Diplome_creuse']

print(f"\nColonnes : {list(df_diplome.columns)}")
display(df_diplome)

---

## 6Ô∏è‚É£ Test Gestion Utilisateurs

In [None]:
# Import
from utils import UserProfileManager
print("‚úÖ UserProfileManager import√©")

In [None]:
# Test complet
print("üë§ TEST : Gestion utilisateurs")
print("="*60)

test_path = PROJECT_ROOT / "app" / "user_profiles_test.json"
manager = UserProfileManager(test_path)

# 1. Cr√©er utilisateur
print("\n1. Cr√©ation utilisateur...")
success, msg = manager.create_user("test", "1234")
print(f"  {msg}")

# 2. Authentifier
print("\n2. Authentification...")
auth = manager.authenticate("test", "1234")
print(f"  {'‚úÖ OK' if auth else '‚ùå KO'}")

# 3. Ajouter film
print("\n3. Ajout film aim√©...")
test_film = df_movies.sample(1).iloc[0]
manager.add_film("test", test_film, "liked")
print(f"  Film : {test_film['primaryTitle']}")

# 4. R√©cup√©rer films
print("\n4. R√©cup√©ration...")
liked = manager.get_liked_films("test")
print(f"  {len(liked)} film(s) aim√©(s)")

# Nettoyer
if test_path.exists():
    test_path.unlink()
print("\n‚úÖ Test termin√© (fichier nettoy√©)")

---

## 7Ô∏è‚É£ Playground - Zone de Test Libre

In [None]:
# Cellule libre 1
# Exemple : Recherche par titre

query = "inception"  # ‚Üê Change ici

results = df_movies[
    df_movies['primaryTitle'].str.contains(query, case=False, na=False) |
    df_movies['frenchTitle'].str.contains(query, case=False, na=False)
]

print(f"R√©sultats : {len(results)}")
results[['primaryTitle', 'frenchTitle', 'startYear', 'averageRating']].head(10)

In [None]:
# Cellule libre 2
# Exemple : Films par acteur

acteur = "Tom Hanks"  # ‚Üê Change ici

films = df_movies[
    df_movies['acteurs'].apply(
        lambda x: acteur in x if isinstance(x, list) else False
    )
]

print(f"Films avec {acteur} : {len(films)}")
films[['primaryTitle', 'frenchTitle', 'startYear', 'averageRating']].sort_values('averageRating', ascending=False).head(10)

In [None]:
# Cellule libre 3
# Exemple : Films par genre

genre = "Drama"  # ‚Üê Change ici

if genre in df_movies.columns:
    films_genre = df_movies[df_movies[genre] == 1]
    print(f"Films {genre} : {len(films_genre):,}")
    
    top_genre = films_genre.nlargest(10, 'averageRating')[['primaryTitle', 'frenchTitle', 'startYear', 'averageRating']]
    display(top_genre)
else:
    print(f"Genre '{genre}' non trouv√©")

In [None]:
# Cellule libre 4
# Ton code ici !


In [None]:
# Cellule libre 5
# Ton code ici !


---

## üìù Notes

**Commandes utiles :**
- `df.head()` / `df.tail()` - Aper√ßu
- `df.info()` - Structure
- `df.describe()` - Stats
- `df.sample(n)` - √âchantillon al√©atoire
- `df.query('...')` - Filtrage SQL-like
- `df.groupby()` - Agr√©gation
- `df.sort_values()` - Tri

**Bon test ! üß™**