In [1]:
# Install required library
!pip install pandas fuzzywuzzy python-Levenshtein

# Import libraries
import pandas as pd
from fuzzywuzzy import process, fuzz

# Step 1: Create a Sample Dataset
data = {
    'Name': ['Shreya', 'Sherya', 'Shreeya', 'Shrea', 'Pranit', 'Pranith'],
    'City': ['Mumbai', 'Mumabi', 'Pune', 'Delhi', 'Pune', 'Pune'],
    'Age': [22, 21, 23, None, 24, 25]
}
df = pd.DataFrame(data)
print("Original Data:\n", df)

# Step 2: Data Slicing & Indexing
print("\nSlice: First 5 rows")
print(df[:5])
print("\nIndex specific value (Name at index 4):", df.loc[4, 'Name'])

# Step 3: Manipulation
df['Gender'] = ['F', 'F', 'F', 'F', 'M', 'M']
print("\nAfter adding new column:\n", df)

# Step 4: Data Cleaning using Fuzzy Logic
def fuzzy_clean(value, choices, threshold=80):
    match, score = process.extractOne(value, choices)
    return match if score >= threshold else value

# Clean City and Name columns
df['City_Cleaned'] = df['City'].apply(lambda x: fuzzy_clean(x, ['Mumbai', 'Pune', 'Delhi']))
df['Name_Cleaned'] = df['Name'].apply(lambda x: fuzzy_clean(x, ['Shreya', 'Pranit']))

# Handle missing Age values
df['Age'] = df['Age'].fillna(df['Age'].mean())

print("\nCleaned Data:\n", df)

# Step 5: Fuzzy Union and Intersection Example
data2 = {'Name': ['Sherya', 'Pranit', 'Ravi'], 'City': ['Mumbai', 'Pune', 'Delhi']}
df2 = pd.DataFrame(data2)
print("\nSecond Dataset:\n", df2)

# --- Fuzzy Intersection ---
intersection = []
for n1 in df['Name_Cleaned']:
    for n2 in df2['Name']:
        if fuzz.ratio(n1, n2) > 80:
            intersection.append(n1)
intersection = list(set(intersection))
print("\nFuzzy Intersection:", intersection)

# --- Fuzzy Union ---
union = list(df['Name_Cleaned'])
for n2 in df2['Name']:
    if not any(fuzz.ratio(n2, u) > 80 for u in union):
        union.append(n2)
print("Fuzzy Union:", union)


Collecting fuzzywuzzy
  Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl.metadata (4.9 kB)
Collecting python-Levenshtein
  Downloading python_levenshtein-0.27.3-py3-none-any.whl.metadata (3.9 kB)
Collecting Levenshtein==0.27.3 (from python-Levenshtein)
  Downloading levenshtein-0.27.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (3.7 kB)
Collecting rapidfuzz<4.0.0,>=3.9.0 (from Levenshtein==0.27.3->python-Levenshtein)
  Downloading rapidfuzz-3.14.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (12 kB)
Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl (18 kB)
Downloading python_levenshtein-0.27.3-py3-none-any.whl (9.5 kB)
Downloading levenshtein-0.27.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (153 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m153.3/153.3 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading rapidfuzz-3.14.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (3.2 M