<a href="https://colab.research.google.com/github/stu4-A/OOP-assignment/blob/main/Assignment1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
import pandas as pd

df = pd.read_csv('/content/netflix_titles.csv')
display(df.head())

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


In [11]:
import pandas as pd
import numpy as np
from collections import defaultdict, Counter
from typing import List, Dict, Set

class NetflixContent:
    """Class to represent a Netflix show/movie with analysis methods"""

    def __init__(self, show_id: str, show_type: str, title: str, director: str,
                 cast: str, country: str, date_added: str, release_year: int,
                 rating: str, duration: str, listed_in: str, description: str):
        self.show_id = show_id
        self.type = show_type
        self.title = title
        self.director = director if pd.notna(director) else ""
        self.cast = cast if pd.notna(cast) else ""
        self.country = country if pd.notna(country) else ""
        self.date_added = date_added if pd.notna(date_added) else ""
        self.release_year = int(release_year) if pd.notna(release_year) and str(release_year).isdigit() else 0
        self.rating = rating if pd.notna(rating) else ""
        self.duration = duration if pd.notna(duration) else ""
        # Handle NaN values in listed_in
        genres_str = listed_in if pd.notna(listed_in) else ""
        self.genres = [genre.strip() for genre in genres_str.split(',') if genre.strip()]
        self.description = description if pd.notna(description) else ""

    def is_family_friendly(self) -> bool:
        """Check if content is family-friendly based on rating"""
        family_ratings = {'G', 'PG', 'TV-G', 'TV-PG', 'TV-Y', 'TV-Y7', 'TV-Y7-FV'}
        return self.rating in family_ratings

    def is_recent(self, threshold_year: int = 2015) -> bool:
        """Check if show was released after threshold year"""
        return self.release_year >= threshold_year

    def has_genre(self, genre: str) -> bool:
        """Check if show contains specific genre"""
        return any(genre.lower() in g.lower() for g in self.genres)

    def __str__(self):
        return f"{self.title} ({self.release_year}) - {self.type} - {self.rating}"

class NetflixAnalyzer:
    """Main analyzer class for Netflix content data"""

    def __init__(self):
        self.shows: List[NetflixContent] = []
        self.df: pd.DataFrame = None
        self.genre_stats = defaultdict(int)
        self.rating_stats = defaultdict(int)
        self.year_stats = defaultdict(int)

    def load_data_from_csv(self, csv_path: str):
        """Load Netflix data from CSV file using pandas"""
        self.df = pd.read_csv(csv_path)
        print(f"Loaded {len(self.df)} records from {csv_path}")
        print("\nDataset Info:")
        print(f"Columns: {list(self.df.columns)}")
        print(f"Shape: {self.df.shape}")

        # Convert DataFrame to NetflixContent objects
        for _, row in self.df.iterrows():
            show = NetflixContent(
                show_id=row.get('show_id', ''),
                show_type=row.get('type', ''),
                title=row.get('title', ''),
                director=row.get('director', ''),
                cast=row.get('cast', ''),
                country=row.get('country', ''),
                date_added=row.get('date_added', ''),
                release_year=row.get('release_year', 0),
                rating=row.get('rating', ''),
                duration=row.get('duration', ''),
                listed_in=row.get('listed_in', ''),
                description=row.get('description', '')
            )
            self.shows.append(show)
            self._update_stats(show)

    def load_data_from_dataframe(self, df: pd.DataFrame):
        """Load Netflix data from existing pandas DataFrame"""
        self.df = df.copy()
        print(f"Loaded {len(self.df)} records from DataFrame")

        # Convert DataFrame to NetflixContent objects
        for _, row in self.df.iterrows():
            show = NetflixContent(
                show_id=row.get('show_id', ''),
                show_type=row.get('type', ''),
                title=row.get('title', ''),
                director=row.get('director', ''),
                cast=row.get('cast', ''),
                country=row.get('country', ''),
                date_added=row.get('date_added', ''),
                release_year=row.get('release_year', 0),
                rating=row.get('rating', ''),
                duration=row.get('duration', ''),
                listed_in=row.get('listed_in', ''),
                description=row.get('description', '')
            )
            self.shows.append(show)
            self._update_stats(show)

    def get_dataframe_summary(self):
        """Display pandas DataFrame summary statistics"""
        if self.df is not None:
            print("=== PANDAS DATAFRAME SUMMARY ===")
            print("\nDataset Shape:", self.df.shape)
            print("\nColumn Info:")
            print(self.df.info())
            print("\nMissing Values:")
            print(self.df.isnull().sum())
            print("\nBasic Statistics:")
            print(self.df.describe(include='all'))
            return self.df.head()
        else:
            print("No DataFrame loaded. Use load_data_from_csv() or load_data_from_dataframe() first.")
            return None

    def _update_stats(self, show: NetflixContent):
        """Update internal statistics when adding a show"""
        for genre in show.genres:
            self.genre_stats[genre] += 1
        self.rating_stats[show.rating] += 1
        self.year_stats[show.release_year] += 1

    def get_family_friendly_content(self) -> List[NetflixContent]:
        """Filter and return family-friendly content"""
        family_shows = []
        for show in self.shows:
            if show.is_family_friendly():
                family_shows.append(show)
        return family_shows

    def get_content_by_genre(self, target_genre: str) -> List[NetflixContent]:
        """Get all content matching a specific genre"""
        matching_shows = []
        for show in self.shows:
            if show.has_genre(target_genre):
                matching_shows.append(show)
        return matching_shows

    def analyze_genres(self) -> Dict[str, int]:
        """Analyze genre distribution"""
        return dict(self.genre_stats)

    def analyze_ratings(self) -> Dict[str, int]:
        """Analyze rating distribution"""
        return dict(self.rating_stats)

    def analyze_release_years(self) -> Dict[int, int]:
        """Analyze content by release year"""
        return dict(self.year_stats)

    def get_content_by_decade(self) -> Dict[str, List[NetflixContent]]:
        """Group content by decade"""
        decades = defaultdict(list)

        for show in self.shows:
            if show.release_year > 0:
                decade = f"{(show.release_year // 10) * 10}s"
                decades[decade].append(show)

        return dict(decades)

    def pandas_analysis(self):
        """Perform analysis using pandas operations"""
        if self.df is None:
            print("No DataFrame available for pandas analysis.")
            return

        print("\n=== PANDAS-BASED ANALYSIS ===")

        # Content type distribution
        print("\nContent Type Distribution:")
        print(self.df['type'].value_counts())

        # Top countries
        print("\nTop 10 Countries (by content count):")
        country_counts = self.df['country'].dropna().value_counts().head(10)
        print(country_counts)

        # Release year trends
        print("\nRelease Year Statistics:")
        year_stats = self.df['release_year'].describe()
        print(year_stats)

        # Rating distribution
        print("\nRating Distribution:")
        rating_counts = self.df['rating'].value_counts()
        print(rating_counts)

        return {
            'type_distribution': self.df['type'].value_counts(),
            'country_distribution': country_counts,
            'year_stats': year_stats,
            'rating_distribution': rating_counts
        }

def analyze_netflix_data_from_csv(csv_path: str):
    """Main function to analyze Netflix data from CSV file"""
    analyzer = NetflixAnalyzer()
    analyzer.load_data_from_csv(csv_path)

    # Display DataFrame summary
    df_head = analyzer.get_dataframe_summary()

    print("\n" + "="*60)
    print("=== NETFLIX CONTENT ANALYSIS ===\n")

    # Basic statistics
    total_shows = len(analyzer.shows)
    movies = sum(1 for show in analyzer.shows if show.type == "Movie")
    tv_shows = sum(1 for show in analyzer.shows if show.type == "TV Show")

    print(f"Total Content: {total_shows}")
    print(f"Movies: {movies}")
    print(f"TV Shows: {tv_shows}")
    print("-" * 40)

    # Genre Analysis
    print("\n=== GENRE ANALYSIS ===")
    genre_stats = analyzer.analyze_genres()
    sorted_genres = sorted(genre_stats.items(), key=lambda x: x[1], reverse=True)

    print("Top 15 Genres:")
    for i, (genre, count) in enumerate(sorted_genres[:15], 1):
        print(f"{i:2d}. {genre:<30} {count:4d} shows")

    # Rating Analysis
    print("\n=== RATING ANALYSIS ===")
    rating_stats = analyzer.analyze_ratings()
    sorted_ratings = sorted(rating_stats.items(), key=lambda x: x[1], reverse=True)

    for rating, count in sorted_ratings:
        if rating:  # Skip empty ratings
            print(f"{rating:<12} {count:4d} shows")

    # Release Year Analysis
    print("\n=== RELEASE YEAR ANALYSIS ===")
    year_stats = analyzer.analyze_release_years()
    valid_years = {k: v for k, v in year_stats.items() if k > 0}

    if valid_years:
        latest_year = max(valid_years.keys())
        earliest_year = min(valid_years.keys())
        print(f"Content spans from {earliest_year} to {latest_year}")

        # Recent content (last 10 years)
        recent_content = sum(count for year, count in valid_years.items()
                           if year >= latest_year - 10)
        print(f"Recent content (last 10 years): {recent_content} shows")

    # Family-Friendly Content
    print("\n=== FAMILY-FRIENDLY CONTENT ===")
    family_content = analyzer.get_family_friendly_content()
    print(f"Family-friendly shows: {len(family_content)}")

    if family_content:
        print("\nSample family-friendly titles:")
        for show in family_content[:8]:  # Show first 8
            print(f"  • {show}")
        if len(family_content) > 8:
            print(f"  ... and {len(family_content) - 8} more")

    # Content by Decade
    print("\n=== CONTENT BY DECADE ===")
    decades = analyzer.get_content_by_decade()
    sorted_decades = sorted(decades.items(), key=lambda x: x[0])

    for decade, shows in sorted_decades:
        if shows:  # Only show decades with content
            print(f"{decade:<10} {len(shows):4d} shows")

    # Pandas-specific analysis
    analyzer.pandas_analysis()

    return analyzer, df_head

def filter_content_by_criteria(analyzer: NetflixAnalyzer,
                             min_year: int = 2015,
                             target_genres: List[str] = None,
                             family_friendly_only: bool = False) -> List[NetflixContent]:
    """Filter content based on multiple criteria"""
    if target_genres is None:
        target_genres = []

    filtered_shows = []

    for show in analyzer.shows:
        # Check year criteria
        if show.release_year < min_year:
            continue

        # Check family-friendly criteria
        if family_friendly_only and not show.is_family_friendly():
            continue

        # Check genre criteria
        if target_genres:
            has_target_genre = False
            for genre in target_genres:
                if show.has_genre(genre):
                    has_target_genre = True
                    break
            if not has_target_genre:
                continue

        filtered_shows.append(show)

    return filtered_shows

# Example usage for your CSV file
if __name__ == "__main__":
    # For Google Colab or Jupyter notebook usage
    csv_file_path = '/content/netflix_titles.csv'

    print("Netflix Data Analysis")
    print("="*50)

    try:
        # Option 1: Direct analysis from CSV
        analyzer, df_head = analyze_netflix_data_from_csv(csv_file_path)

        print("\n" + "="*50)
        print("=== ADVANCED FILTERING EXAMPLES ===")

        # Example 1: Recent family-friendly content
        print("\n1. Recent Family-Friendly Content (2015+):")
        family_recent = filter_content_by_criteria(
            analyzer,
            min_year=2015,
            family_friendly_only=True
        )
        print(f"Found {len(family_recent)} family-friendly shows from 2015+")
        for show in family_recent[:5]:
            print(f"   {show}")

        # Example 2: Comedy content
        print("\n2. Comedy Content:")
        comedy_shows = filter_content_by_criteria(
            analyzer,
            target_genres=["Comedy", "Comedies"]
        )
        print(f"Found {len(comedy_shows)} comedy shows")
        for show in comedy_shows[:5]:
            print(f"   {show}")

        print(f"\nAnalysis complete! Processed {len(analyzer.shows)} shows.")

    except FileNotFoundError:
        print(f"CSV file not found at {csv_file_path}")
        print("Please ensure the file path is correct.")

        # Alternative: Use with pandas DataFrame directly
        print("\nAlternatively, you can use it with an existing DataFrame:")
        print("analyzer = NetflixAnalyzer()")
        print("analyzer.load_data_from_dataframe(your_dataframe)")

Netflix Data Analysis
Loaded 8807 records from /content/netflix_titles.csv

Dataset Info:
Columns: ['show_id', 'type', 'title', 'director', 'cast', 'country', 'date_added', 'release_year', 'rating', 'duration', 'listed_in', 'description']
Shape: (8807, 12)
=== PANDAS DATAFRAME SUMMARY ===

Dataset Shape: (8807, 12)

Column Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8807 entries, 0 to 8806
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   show_id       8807 non-null   object
 1   type          8807 non-null   object
 2   title         8807 non-null   object
 3   director      6173 non-null   object
 4   cast          7982 non-null   object
 5   country       7976 non-null   object
 6   date_added    8797 non-null   object
 7   release_year  8807 non-null   int64 
 8   rating        8803 non-null   object
 9   duration      8804 non-null   object
 10  listed_in     8807 non-null   object
 11  description

In [None]:
# Assignment1

In [12]:
# Question 1
first_name = input("Enter your first name: ")
age = input("Enter your age: ")

message = "Hello " + first_name + ", you are " + age + " years old"

# Print the message 3 times using string operations
print((message + "\n") * 3)

Enter your first name: Owani
Enter your age: 19
Hello Owani, you are 19 years old
Hello Owani, you are 19 years old
Hello Owani, you are 19 years old



In [13]:
# Question 2
num1 = int(input("Enter the first integer: "))
num2 = int(input("Enter the second integer: "))

# Compare numbers
if num1 > num2:
    print(f"{num1} is greater than {num2}")
elif num2 > num1:
    print(f"{num2} is greater than {num1}")
else:
    print("Both numbers are equal")

# Extra condition
if num1 % 2 == 0 and num2 % 2 == 0:
    print("Both are even")
else:
    print("At least one is odd")

Enter the first integer: 1
Enter the second integer: 2
2 is greater than 1
At least one is odd


In [14]:
# Question 3
def analyze_list(L):
    total = sum(L)
    maximum = max(L)
    minimum = min(L)
    return (total, maximum, minimum)
# Collect 5 numbers into a list
numbers = []
for i in range(5):
    num = int(input(f"Enter number {i+1}: "))
    numbers.append(num)

results = analyze_list(numbers)
print(f"Sum: {results[0]}, Max: {results[1]}, Min: {results[2]}")

Enter number 1: 1
Enter number 2: 2
Enter number 3: 3
Enter number 4: 4
Enter number 5: 5
Sum: 15, Max: 5, Min: 1


In [15]:
# Question 4
# Q4: Functions and Abstraction
def is_prime(n):
    if n < 2:
        return False
    for i in range(2, int(n**0.5) + 1):  # check divisibility
        if n % i == 0:
            return False
    return True

# Generate primes between 1 and 50
primes = [n for n in range(1, 51) if is_prime(n)]

print("Prime numbers between 1 and 50:")
print(primes)

Prime numbers between 1 and 50:
[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47]


In [16]:
# Q5: Object-Oriented Programming
import math

class Coordinate:
    def __init__(self, x, y):
        self.x = x
        self.y = y

    def distance(self, other):
        return math.sqrt((self.x - other.x)**2 + (self.y - other.y)**2)

# Example
point1 = Coordinate(3, 4)
point2 = Coordinate(0, 0)

print(f"Distance between points: {point1.distance(point2)}")

Distance between points: 5.0
