In [2]:
import pandas as pd

In [3]:
# 1. Load the CSV (current directory)
df = pd.read_csv(r"E:\DSBDAL\DSBDALExam DataSets\DSBDALExam DataSets\Movie\movies_metadata.csv",
    usecols=["title", "original_language", "vote_average", "popularity"],
    low_memory=False
)

In [4]:
# m. Subsets by original_language
df_en = df[df["original_language"] == "en"]
df_fr = df[df["original_language"] == "fr"]

In [5]:
print("English films:",     df_en.shape)
print("French films:",      df_fr.shape)

English films: (32269, 4)
French films: (2438, 4)


In [6]:
# n. Merge two subsets (English + French)
df_merged = pd.concat([df_en, df_fr], ignore_index=True)

In [7]:
print("Merged EN+FR:",      df_merged.shape)

Merged EN+FR: (34707, 4)


In [8]:
# o. Sort entire dataset by customer rating (vote_average) descending
df_sorted = df.sort_values("vote_average", ascending=False).reset_index(drop=True)

In [9]:
print("\nTop 3 by rating:\n", df_sorted[["title","vote_average"]].head(3))


Top 3 by rating:
                                    title  vote_average
0                              Time Pass          10.0
1  LEGO DC Super Hero Girls: Brain Drain          10.0
2                      Stealing a Nation          10.0


In [10]:
# p. Transpose the first 5 rows
df_transposed = df.head(5).T

In [11]:
print("\nTransposed sample:\n", df_transposed)


Transposed sample:
                            0          1                 2                  3  \
original_language         en         en                en                 en   
popularity         21.946943  17.015539           11.7129           3.859495   
title              Toy Story    Jumanji  Grumpier Old Men  Waiting to Exhale   
vote_average             7.7        6.9               6.5                6.1   

                                             4  
original_language                           en  
popularity                            8.387519  
title              Father of the Bride Part II  
vote_average                               5.7  


In [12]:
# q. Melt to long format (keep row index so we can pivot back)
df_long = df.reset_index().rename(columns={"index": "RowID"})
df_melted = pd.melt(
    df_long,
    id_vars=["RowID", "original_language"],
    var_name="metric",
    value_name="value"
)

In [13]:
print("\nMelted sample:\n", df_melted.head())


Melted sample:
    RowID original_language      metric      value
0      0                en  popularity  21.946943
1      1                en  popularity  17.015539
2      2                en  popularity    11.7129
3      3                en  popularity   3.859495
4      4                en  popularity   8.387519


In [14]:
# r. Pivot back to wide
df_wide = (
    df_melted
      .pivot(index="RowID", columns="metric", values="value")
      .reset_index(drop=True)
)

In [15]:
print("\nWide sample:\n", df_wide.head())


Wide sample:
 metric popularity                        title vote_average
0       21.946943                    Toy Story          7.7
1       17.015539                      Jumanji          6.9
2         11.7129             Grumpier Old Men          6.5
3        3.859495            Waiting to Exhale          6.1
4        8.387519  Father of the Bride Part II          5.7
