### 1. Creating a database

- create a database with objects containing name and age attributes

In [None]:
import random

class Person:
    def __init__(self, name, age):
        self.name = name
        self.age = age

names = ["Mirta", "Bob", "Charlie", "David", "Leandro", "Frank", "Grace", "Helen", "William", "Jack", 
               "Karen", "Emily", "Bruno", "Marcos", "Bernardo", "Paul", "Quincy", "Irineu", "Wagner", "Walter"]

surnames = ["Smith", "Johnson", "Brown", "Taylor", "Lee", "Walker", "Harris", "Young", "King", "Wright",
            "Adams", "da Silva", "Nelson", "Hill", "Carter", "Mitchell", "Perez", "Roberts", "Evans", "Green"]

db = [Person(f"{random.choice(names)} {random.choice(surnames)}", random.randint(1, 100)) for _ in range(50)]

for person in db:
    print(f"Name: {person.name}, Age: {person.age}")

### 2. Frequency Tables

- **Absolute Frequency (Fa)**:  
  This is simply the count of each individual value in your dataset.
  
- **Relative Frequency (Fr)**:  
  **Formula**:  
  $$
  Fr = \frac{Fa}{n}
  $$
  **Where**:
  - Fa is the absolute frequency of a value.
  - n is the total number of data points.

In [None]:
data = [person.age for person in db]

freq = {}
for value in data:
    if value in freq:
        freq[value] += 1
    else:
        freq[value] = 1
        
n = len(data)

rel_freq = {key: val / n for key, val in freq.items()}

print("Absolute Frequency:", freq)
print("Relative Frequency:", rel_freq)

### 2. Class Intervals (Grouping)

- **Class Width**:  
  **Formula**:  
  $$
  \text{class width} = \frac{\text{(max(data)} - \text{min(data))}}{\sqrt{n}}
  $$
  **Where**:
  -  (max(data)) is the maximum value in your dataset.
  -  (min(data)) is the minimum value in your dataset.
  -  (n) is the total number of data points.

In [None]:
import math

min_data = min(data)
max_data = max(data)

def rounded_sqrt(num):
    square_root = math.sqrt(num)
    if square_root.is_integer():
        return int(square_root) 
    else:
        return math.ceil(square_root)  

root = rounded_sqrt(n)

class_width = (max_data - min_data) / root
print("Class Width:", "(", max_data, "-", min_data, ")", "/", root)
print("Class Width:", class_width)

### 3. Measures of Central Tendency

- **Mean (Arithmetic Mean)**:  
  **Formula**:  
  $$
  \text{Mean} = \frac{x_1 + x_2 + x_3 + \dots + x_n}{n}
  $$
  **Where**:
  - (x1, x2, x3) are the individual data points.
  - (n) is the total number of data points.

In [None]:
mean = sum(data) / n
print("Mean:", sum(data), "/", n)
print("Mean:", mean)

- **Weighted Mean**:  
  **Formula**:  
  $$
  \text{Weighted Mean} = \frac{\sum_{i=1}^{n} x_i \cdot p_i}{\sum_{i=1}^{n} p_i}
  $$
  Where:
  - \( x_i \) is each data point.
  - \( p_i \) is the weight associated with each data point.

In [None]:
# Weighted mean calculation (manual)
weighted_sum = sum(x * w for x, w in zip(data, weights))
weight_sum = sum(weights)
weighted_mean = weighted_sum / weight_sum
print("Weighted Mean:", weighted_mean)


- **Median**:  
  **For Even Count**:  
  $$
  \text{Median} = \frac{\text{sorted data}[n//2 - 1] + \text{sorted data}[n//2]}{2}
  $$  
  **For Odd Count**:  
  $$
  \text{Median} = \text{sorted data}[n//2]
  $$
  Where:
  - Sorted data is the dataset arranged in ascending order.
  - \( n \) is the total number of data points.

In [None]:
# Median calculation (manual)
sorted_data = sorted(data)

if n % 2 == 1:
    median = sorted_data[n // 2]
else:
    median = (sorted_data[n // 2 - 1] + sorted_data[n // 2]) / 2

print("Median:", median)


- **Mode**:  
  The value that appears most frequently in the dataset.

In [None]:
# Mode calculation (manual)
mode = max(freq, key=freq.get)
print("Mode:", mode)

### 5. Median for Grouped Data (Md)

- **Formula**:  
  $$
  Md = L_{\text{inf}} + \left(\frac{\frac{n}{2} - F_{\text{ac previous}}}{f_{\text{i class}}}\right) \cdot \text{class width}
  $$
  Where:
  - \( L_{\text{inf}} \) is the lower limit of the median class.
  - \( F_{\text{ac previous}} \) is the cumulative frequency up to the class before the median class.
  - \( f_{\text{i class}} \) is the absolute frequency of the median class.
  - \( \text{class width} \) is the width of the class interval.
  - \( n \) is the total number of data points.

In [None]:
# 5. Median for Grouped Data (Md)

# Grouped data (class intervals and frequencies)
class_intervals = [(min_data + i * class_width, min_data + (i + 1) * class_width) for i in range(int((max_data - min_data) / class_width))]
frequencies = [sum(1 for age in data if interval[0] <= age < interval[1]) for interval in class_intervals]

# Calculate cumulative frequencies (manual calculation)
cumulative_freq = []
cumulative_sum = 0
for f in frequencies:
    cumulative_sum += f
    cumulative_freq.append(cumulative_sum)

# Median calculation (Md for grouped data)
n = sum(frequencies)
median_class_index = next(i for i, freq in enumerate(cumulative_freq) if cumulative_freq[i] >= n / 2)

L_inf = class_intervals[median_class_index][0]
F_ac_previous = cumulative_freq[median_class_index - 1] if median_class_index > 0 else 0
f_class = frequencies[median_class_index]
class_width = class_intervals[0][1] - class_intervals[0][0]  # Assuming equal width for all classes

Md = L_inf + ((n / 2 - F_ac_previous) / f_class) * class_width
print("Median for Grouped Data (Md):", Md)


### 4. Measures of Dispersion

- **Variance (s²)**:  
  **Formula**:  
  $$
  \text{Variance} = \frac{\sum_{i=1}^{n} f_i \cdot (x_i - \text{mean})^2}{n}
  $$
  Where:
  - \( f_i \) is the frequency of the data point.
  - \( x_i \) is the data point.
  - \( \text{mean} \) is the mean of the dataset.
  - \( n \) is the total number of data points.

- **Standard Deviation (s)**:  
  **Formula**:  
  $$
  \text{Standard Deviation} = \sqrt{\text{Variance}}
  $$
  It is simply the square root of the variance.

In [None]:
# Variance and Standard Deviation (manual calculation)
variance = sum((x - mean) ** 2 for x in data) / n
std_dev = variance ** 0.5

print("Variance:", variance)
print("Standard Deviation:", std_dev)


- **Coefficient of Variation (CV)**:  
  **Formula**:  
  $$
  CV = \frac{\text{Standard Deviation}}{\text{Mean}} \times 100
  $$
  This measures the relative dispersion in percentage terms.

In [None]:
# Coefficient of Variation (manual calculation)
cv = (std_dev / mean) * 100
print("Coefficient of Variation:", cv)