In [1]:
import pandas as pd
import matplotlib.pyplot as plt

# --- Step 1: Load data ---
df = pd.read_csv("data/song_lyrics.csv")

# --- Step 2: Basic info ---
print("Shape:", df.shape)
print("\nColumn info:")
print(df.info())

# --- Step 3: Preview first rows ---
display(df.head())

# --- Step 4: Check for nulls ---
print("\nMissing values per column:")
print(df.isnull().sum())

# --- Step 5: Unique values per column ---
for col in df.columns:
    nunique = df[col].nunique()
    print(f"\n--- {col} ---")
    print("Unique values:", nunique)
    
    # If categorical or text-like
    if df[col].dtype == "object" or nunique < 50:
        value_counts = df[col].value_counts(dropna=False)
        print("\nMost popular 5:")
        print(value_counts.head(5))
        print("\nLeast popular 5:")
        print(value_counts.tail(5))
    else:
        print("Numeric column summary:")
        print(df[col].describe())




Shape: (5134856, 11)

Column info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5134856 entries, 0 to 5134855
Data columns (total 11 columns):
 #   Column         Dtype 
---  ------         ----- 
 0   title          object
 1   tag            object
 2   artist         object
 3   year           int64 
 4   views          int64 
 5   features       object
 6   lyrics         object
 7   id             int64 
 8   language_cld3  object
 9   language_ft    object
 10  language       object
dtypes: int64(3), object(8)
memory usage: 430.9+ MB
None


Unnamed: 0,title,tag,artist,year,views,features,lyrics,id,language_cld3,language_ft,language
0,Killa Cam,rap,Cam'ron,2004,173166,"{""Cam\\'ron"",""Opera Steve""}","[Chorus: Opera Steve & Cam'ron]\nKilla Cam, Ki...",1,en,en,en
1,Can I Live,rap,JAY-Z,1996,468624,{},"[Produced by Irv Gotti]\n\n[Intro]\nYeah, hah,...",3,en,en,en
2,Forgive Me Father,rap,Fabolous,2003,4743,{},Maybe cause I'm eatin\nAnd these bastards fien...,4,en,en,en
3,Down and Out,rap,Cam'ron,2004,144404,"{""Cam\\'ron"",""Kanye West"",""Syleena Johnson""}",[Produced by Kanye West and Brian Miller]\n\n[...,5,en,en,en
4,Fly In,rap,Lil Wayne,2005,78271,{},"[Intro]\nSo they ask me\n""Young boy\nWhat you ...",6,en,en,en



Missing values per column:
title               188
tag                   0
artist                0
year                  0
views                 0
features              0
lyrics                0
id                    0
language_cld3     90966
language_ft      134322
language         226918
dtype: int64

--- title ---
Unique values: 3093216

Most popular 5:
title
Intro    6072
Home     1826
Alone    1617
Outro    1506
You      1370
Name: count, dtype: int64

Least popular 5:
title
-10 Kg Di Rabbia                   1
The Streets Of Surrender S.O.S.    1
Summer Seduction                   1
Só não quero                       1
Des terres dAfrique                1
Name: count, dtype: int64

--- tag ---
Unique values: 6

Most popular 5:
tag
pop     2138587
rap     1724816
rock     793220
rb       196462
misc     181455
Name: count, dtype: int64

Least popular 5:
tag
rap        1724816
rock        793220
rb          196462
misc        181455
country     100316
Name: count, dtype: int64

--