In [18]:
import pandas as pd
import random
import string
import numpy as np

# Create synthetic user data
random.seed(42)
n = 100  # Number of users
data = {
    'User_ID': range(1, n+1),
    'Name': [''.join(random.choices(string.ascii_uppercase, k=5)) for _ in range(n)],
    'Age': [random.randint(18, 60) for _ in range(n)],
    'Gender': random.choices(['Male', 'Female'], k=n),
    'City': random.choices(['New York', 'Los Angeles', 'Chicago', 'San Francisco', 'Miami'], k=n)
}

df = pd.DataFrame(data)
display(df.head(5))

Unnamed: 0,User_ID,Name,Age,Gender,City
0,1,QAHFT,57,Female,New York
1,2,RXCKA,52,Female,Miami
2,3,FNAFQ,19,Female,New York
3,4,OFPVA,43,Male,San Francisco
4,5,USIEY,55,Female,San Francisco


In [26]:
# Filtering using Direct Logic
filtered_direct = df[(df['Age'] < 30) & (df['City'] == 'New York')]
display(filtered_direct.sample(5))

Unnamed: 0,User_ID,Name,Age,Gender,City
2,3,FNAFQ,19,Female,New York
67,68,WFVLH,22,Male,New York
28,29,MOGWL,19,Female,New York
94,95,KOKFK,27,Male,New York
76,77,SHIKK,20,Female,New York


In [27]:
# Filtering using between
filtered_between = df[df['Age'].between(25, 35)]
display(filtered_between.sample(5))

Unnamed: 0,User_ID,Name,Age,Gender,City
99,100,RKEMD,35,Female,New York
44,45,ZPOTB,26,Male,San Francisco
25,26,IAYWV,34,Male,Los Angeles
47,48,DXGPQ,25,Female,San Francisco
49,50,SGKRH,31,Female,Chicago


In [25]:

# Filtering using DataFrame.where
filtered_where = df.where((df['Age'] < 30) & (df['City'] == 'New York')).dropna()
display(filtered_where.sample(5))


Unnamed: 0,User_ID,Name,Age,Gender,City
76,77.0,SHIKK,20.0,Female,New York
28,29.0,MOGWL,19.0,Female,New York
94,95.0,KOKFK,27.0,Male,New York
13,14.0,UFAIG,29.0,Female,New York
26,27.0,HBWYC,23.0,Male,New York


In [24]:
# Creating label coloumn using DataFrame.where
filtered_where['Age Group'] = np.where((filtered_where['Age'] < 20), 'Junior', 'Adulthood')
display(filtered_where.sample(5))

Unnamed: 0,User_ID,Name,Age,Gender,City,Age Group
26,27.0,HBWYC,23.0,Male,New York,Adulthood
94,95.0,KOKFK,27.0,Male,New York,Adulthood
28,29.0,MOGWL,19.0,Female,New York,Junior
67,68.0,WFVLH,22.0,Male,New York,Adulthood
13,14.0,UFAIG,29.0,Female,New York,Adulthood


In [28]:
# Filtering using contains
filtered_contains = df[df['Name'].str.contains('A')]
display(filtered_contains.sample(5))

Unnamed: 0,User_ID,Name,Age,Gender,City
59,60,OLABW,24,Female,Miami
73,74,OGSAY,56,Male,New York
3,4,OFPVA,43,Male,San Francisco
68,69,UFAFI,55,Female,Chicago
64,65,WZVWA,42,Male,Chicago


In [29]:
# Filtering using isin
cities_to_filter = ['New York', 'Los Angeles']
filtered_isin = df[df['City'].isin(cities_to_filter)]
display(filtered_isin.sample(5))

Unnamed: 0,User_ID,Name,Age,Gender,City
25,26,IAYWV,34,Male,Los Angeles
28,29,MOGWL,19,Female,New York
73,74,OGSAY,56,Male,New York
23,24,WHQPD,35,Female,New York
14,15,FYWIR,21,Male,Los Angeles


In [37]:
# Filtering using groupby.filter
filtered_groupby = df.groupby('City').filter(lambda x: len(x) > 20)
display(filtered_groupby)


Filtered using groupby.filter:


Unnamed: 0,User_ID,Name,Age,Gender,City
0,1,QAHFT,57,Female,New York
2,3,FNAFQ,19,Female,New York
6,7,USNZJ,60,Male,New York
8,9,SBFHC,23,Female,Chicago
9,10,GCHQJ,59,Male,New York
10,11,JFGYQ,45,Male,Chicago
12,13,ZQORV,47,Female,Chicago
13,14,UFAIG,29,Female,New York
17,18,FZNCB,38,Male,New York
21,22,QCLLY,39,Female,Chicago


In [42]:
display(df['City'].value_counts().reset_index())

Unnamed: 0,index,City
0,Chicago,26
1,New York,25
2,San Francisco,17
3,Los Angeles,17
4,Miami,15


In [43]:
# Filtering using apply function
def custom_filter(row):
    return row['Age'] < 30 and row['City'] == 'New York'

filtered_apply = df[df.apply(custom_filter, axis=1)]
display(filtered_apply.sample(5))


Unnamed: 0,User_ID,Name,Age,Gender,City
67,68,WFVLH,22,Male,New York
26,27,HBWYC,23,Male,New York
74,75,NSTRJ,25,Male,New York
13,14,UFAIG,29,Female,New York
76,77,SHIKK,20,Female,New York


In [44]:
# Filtering using query
filtered_query = df.query('Age < 30 and City == "New York"')
display(filtered_query.head())

Unnamed: 0,User_ID,Name,Age,Gender,City
2,3,FNAFQ,19,Female,New York
13,14,UFAIG,29,Female,New York
26,27,HBWYC,23,Male,New York
28,29,MOGWL,19,Female,New York
67,68,WFVLH,22,Male,New York
