### 1. Creating a datnabase

- create a database with objects containing name and age attributes

In [None]:
import random

class Person:
    def __init__(self, name, age):
        self.name = name
        self.age = age

names = ["Mirta", "Bob", "Charlie", "David", "Leandro", "Frank", "Grace", "Helen", "William", "Jack",
               "Karen", "Emily", "Bruno", "Marcos", "Bernardo", "Paul", "Quincy", "Irineu", "Wagner", "Walter"]

surnames = ["Smith", "Johnson", "Brown", "Taylor", "Lee", "Walker", "Harris", "Young", "King", "Wright",
            "Adams", "da Silva", "Nelson", "Hill", "Carter", "Mitchell", "Perez", "Roberts", "Evans", "Green"]

db = [Person(f"{random.choice(names)} {random.choice(surnames)}", random.randint(1, 100)) for _ in range(50)]

for person in db:
    print(f"Name: {person.name}, Age: {person.age}")

### 2. Frequency Tables

- **Absolute Frequency (Fa)**:  
  This is simply the count of each individual value in your dataset.
  
- **Relative Frequency (Fr)**:  
  **Formula**:  
  $$
  Fr = \frac{Fa}{n}
  $$
  **Where**:
  - Fa - is the absolute frequency of a value.
  - n - is the total number of data points.

In [None]:
data = [person.age for person in db]

freq = {}
for value in data:
    if value in freq:
        freq[value] += 1
    else:
        freq[value] = 1

n = len(data)

rel_freq = {key: val / n for key, val in freq.items()}

print("Absolute Frequency:", freq)
print("Relative Frequency:", rel_freq)

### 2. Class Intervals (Grouping)

- **Class Width**:  
  **Formula**:  
  $$
  \text{class width} = \frac{\text{(max(data)} - \text{min(data))}}{\sqrt{n}}
  $$
  **Where**:
  -  max(data) - is the maximum value in your dataset.
  -  min(data) - is the minimum value in your dataset.
  -  n - is the total number of data points.

In [None]:
import math

min_data = min(data)
max_data = max(data)

def rounded_sqrt(num):
    square_root = math.sqrt(num)
    if square_root.is_integer():
        return int(square_root)
    else:
        return math.ceil(square_root)

num_classes = rounded_sqrt(n)
print("Number of class without round:", math.sqrt(n))
print("Number of class rounded:", num_classes)

class_width = math.ceil((max_data - min_data) / num_classes)
print("Class Width:", "(", max_data, "-", min_data, ")", "/", num_classes)
print("Class Width without round:",  (max_data - min_data) / num_classes)
print("Class Width:", class_width)

### 2.1 Defining Class Limits

In [None]:
class Data_Class:
    def __init__(self, lower_limit, upper_limit):
        self.lower_limit = lower_limit
        self.upper_limit = upper_limit
        self.class_average = (lower_limit + upper_limit) / 2
        self.absolute_frequency = 0

def create_data_classes(num_classes, class_width):
    data_classes = []
    for i in range(num_classes):
        lower_limit = min_data + i * class_width  
        upper_limit = lower_limit + class_width
        data_classes.append(Data_Class(lower_limit, upper_limit))
    return data_classes

def increment_frequency(data_classes, data):
    for number in data:
        for data_class in data_classes:
            if data_class.lower_limit <= number < data_class.upper_limit:
                data_class.absolute_frequency += 1

data_classes = create_data_classes(num_classes, class_width)
increment_frequency(data_classes, data)

print(num_classes)
print(class_width)
for dc in data_classes:
    print(f"lower_limit: {dc.lower_limit}, upper_limit: {dc.upper_limit}, class_average: {dc.class_average}, absolute_frequency: {dc.absolute_frequency}")

### 3. Measures of Central Tendency

- **Mean (Arithmetic Mean)**:  
  **Formula**:  
  $$
  \text{Mean} = \frac{x_1 + x_2 + x_3 + \dots + x_n}{n}
  $$
  **Where**:
  - x1, x2, x3 - are the individual data points.
  - n - is the total number of data points.

In [None]:
mean = sum(data) / n
print("Mean:", sum(data), "/", n)
print("Mean:", mean)

- **Class Weighted Mean**:  
  **Formula**:  
  $$
  \text{Weighted Mean} = \frac{\sum_{i=1}^{n} x_i \cdot p_i}{\sum_{i=1}^{n} p_i}
  $$
  Where:
  - xi - is each mean's class.
  - pi - is the weight associated with each class.

In [None]:
weighted_mean = 0
total_frequency = sum(dc.absolute_frequency for dc in data_classes)
for dc in data_classes:
    weighted_mean += dc.class_average * dc.absolute_frequency
weighted_mean /= total_frequency  

print("Weighted Mean:", sum(data), "/", n)
print("Weighted Mean:", mean)

- **Median**:  
  **For Even Count**:  
  $$
  \text{Median} = \frac{\text{sorted data}[(n / 2) + 1] + \text{sorted data}[n / 2]}{2}
  $$  
  **For Odd Count**:  
  $$
  \text{Median} = \text{sorted data}[n / 2]
  $$
  Where:
  - Sorted data is the dataset arranged in ascending order.
  - n - is the total number of data points.

In [None]:
sorted_data = sorted(data)

print("Sorted Data:", sorted_data)

if n % 2 == 1:
    median = sorted_data[n // 2]
else:
    median = (sorted_data[n // 2 - 1] + sorted_data[n // 2]) / 2
    print(sorted_data[n // 2 - 1] ," + ",sorted_data[n // 2])


print("Median:", median)

- **Mode**:  
  The value that appears most frequently in the dataset.

In [None]:
max_freq = max(freq.values())
mode = [key for key, val in freq.items() if val == max_freq]  # Lista de modas
print("Mode(s):", mode)

### 5. Median for Grouped Data (Md)

- **Formula**:  
  $$
  Md = L_{\text{inf}} + \frac{\left(\frac{n}{2} - F_{\text{ac previous}}\right)}{f_{\text{i class}}} \cdot \text{class width}
  $$

  Where:
  - Linf - is the lower limit of the median class.
  - Fac previous - is the cumulative frequency up to the class before the median class.
  - fi - the absolute frequency of the median class.
  - class width - is the width of the class interval.
  - n - is the total number of data points.

In [None]:
cumulative_frequency = 0
median_class = None
n_by_2 = n / 2

for data_class in data_classes:
    cumulative_frequency += data_class.absolute_frequency
    if cumulative_frequency >= n_by_2:
        median_class = data_class
        break

cumulative_frequency_previous = cumulative_frequency - median_class.absolute_frequency

Linf = median_class.lower_limit
fi = median_class.absolute_frequency
Fac = cumulative_frequency_previous
class_width

median = Linf + ((n_by_2 - Fac) / fi) * class_width

print("Grouped Median:", median)

### 4. Measures of Dispersion

- **Variance (s²)**:  
  **Formula**:  
  $$
  \text{Variance} = \frac{\sum_{i=1}^{n} f_i \cdot (x_i - \text{mean})^2}{n}
  $$
  Where:
  - fi - is the frequency of the data point.
  - xi - is the data point.
  - mean - is the mean of the dataset.
  - n - is the total number of data points.
- **Standard Deviation (s)**:  
  **Formula**:  
  $$
  \text{Standard Deviation} = \sqrt{\text{Variance}}
  $$
  It is simply the square root of the variance.

In [None]:
variance = sum((x - mean) ** 2 for x in data) / n
std_dev = math.sqrt(variance)

print("Variance:", variance)
print("Standard Deviation:", std_dev)

- **Coefficient of Variation (CV)**:  
  **Formula**:  
  $$
  CV = \frac{\text{Standard Deviation}}{\text{Mean}} \times 100
  $$
  This measures the relative dispersion in percentage terms.

In [None]:
cv = (std_dev / mean) * 100
print("Coefficient of Variation:", cv)