# Day3_Python_Pandas_Basics.ipynb
# Content: Python Refresher + Pandas Basics (IMDB dataset)

## Step 1: Import Libraries

In [7]:
import pandas as pd
import numpy as np

## Step 2: Python Refresher


In [8]:
# Variables
x = 10
name = "Maroof"
print("x:", x, "name:", name)

# List
numbers = [1, 2, 3, 4, 5]
print("Numbers:", numbers)

# Dictionary
student = {"name": "Maroof", "age": 20}
print("Student dict:", student)

# Loop
print("Loop example:")
for i in range(5):
    print(i)

# Function
def add(a, b):
    return a + b

print("Add 2 + 3 =", add(2,3))

x: 10 name: Maroof
Numbers: [1, 2, 3, 4, 5]
Student dict: {'name': 'Maroof', 'age': 20}
Loop example:
0
1
2
3
4
Add 2 + 3 = 5


## Step 3: Pandas Basics

In [9]:
# Create Series
s = pd.Series([10, 20, 30, 40])
print("Series:\n", s)

# Create DataFrame
df = pd.DataFrame({
    "Name": ["Alice", "Bob", "Charlie"],
    "Age": [25, 30, 22],
    "Rating": [8.5, 7.2, 9.0]
})
df

Series:
 0    10
1    20
2    30
3    40
dtype: int64


Unnamed: 0,Name,Age,Rating
0,Alice,25,8.5
1,Bob,30,7.2
2,Charlie,22,9.0


## Step 4: Load IMDB Dataset

In [14]:
# Replace with your dataset path
imdb_df = pd.read_csv("/content/IMDB Dataset.csv")

# Inspect dataset
print("\nFirst 5 rows:\n", imdb_df.head())
print("\nShape:", imdb_df.shape)
print("\nColumns:", imdb_df.columns)
print("\nInfo:")
print(imdb_df.info())
print("\nSummary Statistics:\n", imdb_df.describe())


First 5 rows:
                                               review sentiment
0  One of the other reviewers has mentioned that ...  positive
1  A wonderful little production. <br /><br />The...  positive
2  I thought this was a wonderful way to spend ti...  positive
3  Basically there's a family where a little boy ...  negative
4  Petter Mattei's "Love in the Time of Money" is...  positive

Shape: (50000, 2)

Columns: Index(['review', 'sentiment'], dtype='object')

Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50000 entries, 0 to 49999
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   review     50000 non-null  object
 1   sentiment  50000 non-null  object
dtypes: object(2)
memory usage: 781.4+ KB
None

Summary Statistics:
                                                    review sentiment
count                                               50000     50000
unique                                              49

## Step 5: Selecting Data

In [17]:
# Select a column
ratings = imdb_df["review"]
print("\Review column:\n", ratings.head())

# Select rows
first_5_rows = imdb_df.iloc[:5]
print("\nFirst 5 rows:\n", first_5_rows)

\Review column:
 0    One of the other reviewers has mentioned that ...
1    A wonderful little production. <br /><br />The...
2    I thought this was a wonderful way to spend ti...
3    Basically there's a family where a little boy ...
4    Petter Mattei's "Love in the Time of Money" is...
Name: review, dtype: object

First 5 rows:
                                               review sentiment
0  One of the other reviewers has mentioned that ...  positive
1  A wonderful little production. <br /><br />The...  positive
2  I thought this was a wonderful way to spend ti...  positive
3  Basically there's a family where a little boy ...  negative
4  Petter Mattei's "Love in the Time of Money" is...  positive


  print("\Review column:\n", ratings.head())


## Step 6: Practice Tasks


In [21]:
# Task 1: Find missing values
missing_values = imdb_df.isnull().sum()
print("\nMissing values per column:\n", missing_values)

# Task 2: Top 10 highest rated movies
top_10 = imdb_df.sort_values("review", ascending=False).head(10)
print("\nTop 10 highest rated movies:\n", top_10[["review", "sentiment"]])



Missing values per column:
 review       0
sentiment    0
dtype: int64

Top 10 highest rated movies:
                                                   review sentiment
41804  ý thýnk uzak ýs the one of the best films of a...  positive
36833      Film auteur Stephan Woloszczuk explores th...  positive
34432  ~~I was able to see this movie yesterday morni...  positive
561    {rant start} I didn't want to believe them at ...  negative
13602  {Possible spoilers coming up... you've been fo...  positive
24420  zero day is based of columbine high school mas...  positive
45096  you will likely be sorely disappointed by this...  negative
38558  you must be seeing my comments over many films...  negative
28224  you know, i always fancy disturbing or strange...  negative
8379   you know I've seen a lot of crappy hong kong m...  positive
