## Titanic

In [1]:
import csv
from collections import defaultdict, Counter
import math

filename = "titanic.csv"

def is_float(val):
    try:
        float(val)
        return True
    except:
        return False

def describe_column(values):
    numeric_values = [float(v) for v in values if is_float(v)]
    count = len(numeric_values)
    mean = sum(numeric_values) / count if count else None
    minimum = min(numeric_values) if numeric_values else None
    maximum = max(numeric_values) if numeric_values else None
    std = math.sqrt(sum((x - mean) ** 2 for x in numeric_values) / count) if count else None
    return {"count": count, "mean": mean, "min": minimum, "max": maximum, "std": std}

def value_counts(values):
    return Counter(values)

with open(filename, newline='', encoding="utf-8") as f:
    reader = csv.DictReader(f)
    data = defaultdict(list)
    for row in reader:
        for key, value in row.items():
            data[key].append(value)

print("=== Descriptive Statistics (Numeric Columns) ===")
for col, values in data.items():
    if all(is_float(v) or v == '' for v in values):
        stats = describe_column(values)
        print(f"{col}: {stats}")

print("\n=== Categorical Value Counts ===")
for col, values in data.items():
    if not all(is_float(v) or v == '' for v in values):
        counts = value_counts(values)
        print(f"{col}: {counts.most_common(5)}")


=== Descriptive Statistics (Numeric Columns) ===
PassengerId: {'count': 891, 'mean': 446.0, 'min': 1.0, 'max': 891.0, 'std': 257.20938292890224}
Survived: {'count': 891, 'mean': 0.3838383838383838, 'min': 0.0, 'max': 1.0, 'std': 0.48631931786709987}
Pclass: {'count': 891, 'mean': 2.308641975308642, 'min': 1.0, 'max': 3.0, 'std': 0.8356019334795166}
Age: {'count': 714, 'mean': 29.69911764705882, 'min': 0.42, 'max': 80.0, 'std': 14.516321150817316}
SibSp: {'count': 891, 'mean': 0.5230078563411896, 'min': 0.0, 'max': 8.0, 'std': 1.1021244350892878}
Parch: {'count': 891, 'mean': 0.38159371492704824, 'min': 0.0, 'max': 6.0, 'std': 0.8056047612452208}
Fare: {'count': 891, 'mean': 32.204207968574636, 'min': 0.0, 'max': 512.3292, 'std': 49.6655344447741}

=== Categorical Value Counts ===
Name: [('Braund, Mr. Owen Harris', 1), ('Cumings, Mrs. John Bradley (Florence Briggs Thayer)', 1), ('Heikkinen, Miss. Laina', 1), ('Futrelle, Mrs. Jacques Heath (Lily May Peel)', 1), ('Allen, Mr. William Henry

## Iris

In [1]:
import csv
from collections import Counter
from statistics import mean, stdev

filepath = "Iris.csv"

with open(filepath, "r", encoding="utf-8") as f:
    reader = csv.DictReader(f)
    rows = list(reader)

stats = {}
sample_row = rows[0]
numeric_cols = []

for key, val in sample_row.items():
    try:
        float(val)
        numeric_cols.append(key)
    except:
        continue

for col in sample_row.keys():
    col_data = [row[col] for row in rows if row[col]]
    if col in numeric_cols:
        numeric_values = [float(x) for x in col_data if x.replace('.', '', 1).isdigit()]
        if numeric_values:
            stats[col] = {
                'count': len(numeric_values),
                'mean': mean(numeric_values),
                'min': min(numeric_values),
                'max': max(numeric_values),
                'std': stdev(numeric_values) if len(numeric_values) > 1 else 0.0,
            }
    else:
        freq = Counter(col_data)
        stats[col] = {
            'unique': len(freq),
            'most_common': freq.most_common(1)
        }

print(stats)


{'Id': {'count': 150, 'mean': 75.5, 'min': 1.0, 'max': 150.0, 'std': 43.445367992456916}, 'SepalLengthCm': {'count': 150, 'mean': 5.843333333333334, 'min': 4.3, 'max': 7.9, 'std': 0.828066127977863}, 'SepalWidthCm': {'count': 150, 'mean': 3.054, 'min': 2.0, 'max': 4.4, 'std': 0.43359431136217363}, 'PetalLengthCm': {'count': 150, 'mean': 3.7586666666666666, 'min': 1.0, 'max': 6.9, 'std': 1.7644204199522626}, 'PetalWidthCm': {'count': 150, 'mean': 1.1986666666666668, 'min': 0.1, 'max': 2.5, 'std': 0.7631607417008412}, 'Species': {'unique': 3, 'most_common': [('Iris-setosa', 50)]}}


## Netflix

In [2]:
import csv
from collections import Counter
from statistics import mean, stdev

filepath = "netflix_titles.csv"  

with open(filepath, "r", encoding="utf-8") as f:
    reader = csv.DictReader(f)
    rows = list(reader)

stats = {}
sample_row = rows[0]
numeric_cols = []

for key, val in sample_row.items():
    try:
        float(val)
        numeric_cols.append(key)
    except:
        continue

for col in sample_row.keys():
    col_data = [row[col] for row in rows if row[col]]
    if col in numeric_cols:
        numeric_values = [float(x) for x in col_data if x.replace('.', '', 1).isdigit()]
        if numeric_values:
            stats[col] = {
                'count': len(numeric_values),
                'mean': mean(numeric_values),
                'min': min(numeric_values),
                'max': max(numeric_values),
                'std': stdev(numeric_values) if len(numeric_values) > 1 else 0.0,
            }
    else:
        freq = Counter(col_data)
        stats[col] = {
            'unique': len(freq),
            'most_common': freq.most_common(1)
        }

print(stats)


{'show_id': {'unique': 8807, 'most_common': [('s1', 1)]}, 'type': {'unique': 2, 'most_common': [('Movie', 6131)]}, 'title': {'unique': 8807, 'most_common': [('Dick Johnson Is Dead', 1)]}, 'director': {'unique': 4528, 'most_common': [('Rajiv Chilaka', 19)]}, 'cast': {'unique': 7692, 'most_common': [('David Attenborough', 19)]}, 'country': {'unique': 748, 'most_common': [('United States', 2818)]}, 'date_added': {'unique': 1767, 'most_common': [('January 1, 2020', 109)]}, 'release_year': {'count': 8807, 'mean': 2014.1801975701146, 'min': 1925.0, 'max': 2021.0, 'std': 8.819312130833968}, 'rating': {'unique': 17, 'most_common': [('TV-MA', 3207)]}, 'duration': {'unique': 220, 'most_common': [('1 Season', 1793)]}, 'listed_in': {'unique': 514, 'most_common': [('Dramas, International Movies', 362)]}, 'description': {'unique': 8775, 'most_common': [('Paranormal activity at a lush, abandoned property alarms a group eager to redevelop the site, but the eerie events may not be as unearthly as they 