In [6]:
import statistics
import csv
import numpy as np
import numpy.random as rand
import math
from collections import defaultdict

In [7]:
#Task 1
class Person():
    age="int"
    height="int"
    weight="int"
    sex="string, male/female"
    hobbies="list of strings"
    def __init__(self,age,height,weight,sex,hobbies):
        """
        This is the initial function of the person object
        Args:
        self--this indicates the current person object
        age(int)--this represents the age attribute of the current person object
        height(int)--this represents the height attribute of the current person object
        weight(int)--this represents the weight attribute of the current person object
        sex(string)--this represents the gender attribute of the current person object, options: "m" or "f"
        hobbies(list[str])--this represents the hobbies attribute of the current person object, which is a list of strings
    Returns:
        None
    """
        self.age=age
        self.height=height
        self.weight=weight
        self.sex=sex
        self.hobbies=hobbies
    def repr(self):
        """
        Returns a string containing the person's age, height, weight, sex and hobbies
        Args:
        self--this indicates the current person object
        Returns:
        a string
        """
        return (f'{self.age},{self.height},{self.weight},{self.sex},{self.hobbies}')
person1=Person(12,187,55,"f",["Reading","TV","Fishing","Video Games"])
print(person1.repr())

12,187,55,f,['Reading', 'TV', 'Fishing', 'Video Games']


In [8]:
#Task 2
class Population():
    pop="a list of people in the population"
    def __init__(self,pop):
        """
        Initialize the population
        Args:
        self--indicates the current Population object
        pop--the list of Person objects that is used to initialize the Population object
        Returns:
        None
        """
        self.pop=pop
    def add_person(self,person):
        """
        return the population after adding a new person
        Args:
        self--indicates the current Population object
        person--the new person object that is going to be added to the population
        Returns:
        The new population object
        """
        self.pop.append(person)
        return self.pop
    def add_people(self,people):
        """
        Returns the population after adding a new population
        Args:
        self--indicates the current Population object
        people--the list of Person objects that will be added to the current population
        Returns:
        The new population object
        """
        self.pop.extend(people)
        return self.pop
    def top_hobby(self,age,fixed_range):
        """
        Return the number one hobby in the population
        Args:
        self--indicates the current Population object
        age--indicates the set age for the function as a filter
        fixed_range--indicates the range of the filter age
        Returns:
        the most common hobby based on the age range
        """
        new_map={}
        top_hobby=""
        frequency=0
        for person in self.pop:
            if person.age<=age+fixed_range & person.age>=age-fixed_range:
               for hobby in person.hobbies:
                   if hobby not in new_map.items():
                        new_map[hobby]=1
                   else:
                        new_map[hobby]=new_map[hobby]+1
        for key in new_map:
            if new_map[key]>frequency:
                frequency=new_map[key]
                top_hobby=key
        return top_hobby
    def filter_population(self, m_or_f, height_min, height_max, weight_min, weight_max):
        """
        According to the filters in the args, returns the new population
        Args:
        self--indicates the current Population object
        m_or_f--indicates the filter gender for the new population--can be "m" or "f" or "both"
        height_min--indicates the minimum height for the new population
        height_max--indicates the maximum height for the new population
        weight_min--indicates the minimum weight for the new population
        weight_max--indicates the maximum weight for the new population
        Returns:
        the new population based on the filters as a new list
        """
        new_list=[]
        for person in self.pop:
            if m_or_f=="both" | person.sex==m_or_f:
                if person.height>=height_min & person.height<=height_max:
                    if person.weight>=weight_min & person.weight<=weight_max:
                        new_list.append(person)
        return new_list
    def save_population(self):
        """
        save the population to a csv file
        Args:
        self--indicates the current Population object
        Returns:
        None
        """
        #Writing to CSV Files
        file_path = "population_csv.csv"
        with open(file_path, 'w', newline="") as csv_file:
             csv_writer = csv.writer(csv_file, quotechar='|', lineterminator='\n')
             field_names = ['Age', 'Weight', 'Height','Sex','Hobbies']
             csv_writer.writerow(field_names) # writerow, like write, lets us write a single record into the csv
             for person in self.pop:
                 data=[]
                 data.append(person.age)
                 data.append(person.weight)
                 data.append(person.height)
                 data.append(person.sex)
                 data.append(person.hobbies)
                 csv_writer.writerows(data) # writerows, like writelines, takes an iterable and writes each to a line
    
    def stats(self):
        """
        calculate some stats of the population
        Args:
        self--indicates the current Population object
        Returns:
        pop_stats as a dictionary containing the name of the stats as keys and the values of the stats as values
        """
        pop_stats={}
        age_list=[]
        height_list=[]
        weight_list=[]
        avg_age=0
        avg_height=0
        avg_weight=0
        median_age=0
        median_height=0
        median_weight=0
        std_age=0
        std_height=0
        std_weight=0
        for person in self.pop:
            age_list.append(person.age)
            height_list.append(person.height)
            weight_list.append(person.weight)
        avg_age=sum(age_list)/len(age_list)
        avg_height=sum(height_list)/len(height_list)
        avg_weight=sum(weight_list)/len(weight_list)
        median_age=statistics.median(age_list)
        median_height=statistics.median(height_list)
        median_weight=statistics.median(weight_list)
        std_age=statistics.stdev(age_list)
        std_height=statistics.stdev(height_list)
        std_weight=statistics.stdev(weight_list)
        pop_stats["avg_age"]=avg_age
        pop_stats["avg_height"]=avg_height
        pop_stats["avg_weight"]=avg_weight
        pop_stats["median_age"]=median_age
        pop_stats["median_height"]=median_height
        pop_stats["median_weight"]=median_weight
        pop_stats["std_age"]=std_age
        pop_stats["std_height"]=std_height
        pop_stats["std_weight"]=std_weight
        return pop_stats
    def repr(self):
        return self.pop
    

In [9]:
#Task 3
"""
Task 3 requires us to create of a population consisting of 100 Person objects based on the following distributions:
age = normal distribution with mean at 50 and std at 20
height_male = normal distribution with mean at 70 and std at 4
height_female = normal distribution with mean at 65 and std at 3.5
weight_male = 10/3 * height - 58 + np.random.normal(scale=7)
weight_female = 5/2 * height - 15 + np.random.normal(scale=7)
sex = binomial distribution with p(male/female) equals 0.5
hobbies = random selection (1-5 hobbies) from the hobby_list list
"""
hobby_list=["Reading","TV","Fishing","Video Games","Gardening","Exercising","Listening to Music","Golf","Board Games","Tennis","Painting","Guitar","Violin","Writing","Swimming","Running","Cooking","Drawing","Basketball","Skiing","Photography"]
pop=[]
np.random.seed(10)
for x in range(101):
    personal_age=math.floor(rand.normal(loc=50, scale=20))
    personal_sex=math.floor(rand.binomial(100,0.5))
    if personal_sex==1:
        height_male=math.floor(rand.normal(70,4))
        weight_male=math.floor(10/3*height_male-58+np.random.normal(scale=7))
    else:
        height_female=math.floor(rand.normal(65,3.5))
        weight_female=math.floor(5/2*height_female-15+np.random.normal(scale=7))
    personal_hobbies=[]
    num1=rand.randint(1,6)
    for x in range(num1):
        num2=rand.randint(0,21)
        personal_hobbies.append(hobby_list[num2])
        if personal_sex==1:
            person=Person(personal_age,height_male,weight_male,"m",personal_hobbies)
        else:
            person=Person(personal_age,height_female,weight_female,"f",personal_hobbies)
        pop.append(person)
pop=Population(pop)

In [None]:
#Task 4
def top_hobby(self,age,fixed_range):
        """
        Return the number one hobby in the population
        Args:
        self--indicates the current Population object
        age--indicates the set age for the function as a filter
        fixed_range--indicates the range of the filter age
        Returns:
        the most common hobby based on the age range
        """
        new_map={}
        top_hobby=""
        frequency=0
        for person in self.pop:
            if person.age<=age+fixed_range & person.age>=age-fixed_range:
               for hobby in person.hobbies:
                   if hobby in new_map.items():
                        new_map[hobby]=new_map[hobby]+1
                        
                   else:
                        new_map[hobby]=1
        for key in new_map:
            if new_map[key]>frequency:
                frequency=new_map[key]
                top_hobby=key
        return top_hobby

In [None]:
#Task 5
def filter_population(self, m_or_f, height_min, height_max, weight_min, weight_max):
        """
        According to the filters in the args, returns the new population
        Args:
        self--indicates the current Population object
        m_or_f--indicates the filter gender for the new population--can be "m" or "f" or "both"
        height_min--indicates the minimum height for the new population
        height_max--indicates the maximum height for the new population
        weight_min--indicates the minimum weight for the new population
        weight_max--indicates the maximum weight for the new population
        Returns:
        the new population based on the filters as a new list
        """
        new_list=[]
        for person in self.pop:
            if m_or_f=="both" | person.sex==m_or_f:
                if person.height>=height_min & person.height<=height_max:
                    if person.weight>=weight_min & person.weight<=weight_max:
                        new_list.append(person)
        return new_list

In [None]:
#Task 6
def save_population(self):
        """
        save the population to a csv file
        Args:
        self--indicates the current Population object
        Returns:
        None
        """
        #Writing to CSV Files
        file_path = "population_csv.csv"
        with open(file_path, 'w', newline="") as csv_file:
             csv_writer = csv.writer(csv_file, quotechar='|', lineterminator='\n')
             field_names = ['Age', 'Weight', 'Height','Sex','Hobbies']
             csv_writer.writerow(field_names) # writerow, like write, lets us write a single record into the csv
             for person in self.pop:
                 data=[]
                 data.append(person.age)
                 data.append(person.weight)
                 data.append(person.height)
                 data.append(person.sex)
                 data.append(person.hobbies)
                 csv_writer.writerows(data) # writerows, like writelines, takes an iterable and writes each to a line

In [None]:
#Task 7
def stats(self):
        """
        calculate some stats of the population
        Args:
        self--indicates the current Population object
        Returns:
        pop_stats as a dictionary containing the name of the stats as keys and the values of the stats as values
        """
        pop_stats={}
        age_list=[]
        height_list=[]
        weight_list=[]
        avg_age=0
        avg_height=0
        avg_weight=0
        median_age=0
        median_height=0
        median_weight=0
        std_age=0
        std_height=0
        std_weight=0
        for person in self.pop:
            age_list.append(person.age)
            height_list.append(person.height)
            weight_list.append(person.weight)
        avg_age=sum(age_list)/len(age_list)
        avg_height=sum(height_list)/len(height_list)
        avg_weight=sum(weight_list)/len(weight_list)
        median_age=statistics.median(age_list)
        median_height=statistics.median(height_list)
        median_weight=statistics.median(weight_list)
        std_age=statistics.stdev(age_list)
        std_height=statistics.stdev(height_list)
        std_weight=statistics.stdev(weight_list)
        pop_stats["avg_age"]=avg_age
        pop_stats["avg_height"]=avg_height
        pop_stats["avg_weight"]=avg_weight
        pop_stats["median_age"]=median_age
        pop_stats["median_height"]=median_height
        pop_stats["median_weight"]=median_weight
        pop_stats["std_age"]=std_age
        pop_stats["std_height"]=std_height
        pop_stats["std_weight"]=std_weight
        return pop_stats

In [13]:
#Task 8
"""
Task 8 requires us to create of a population consisting of 1000 Person objects based on the following distributions:
age = normal distribution with mean at 50 and std at 20
height_male = normal distribution with mean at 70 and std at 4
height_female = normal distribution with mean at 65 and std at 3.5
weight_male = 10/3 * height - 58 + np.random.normal(scale=7)
weight_female = 5/2 * height - 15 + np.random.normal(scale=7)
sex = binomial distribution with p(male/female) equals 0.5
hobbies = random selection (1-5 hobbies) from the hobby_list list
Then it requires us to save the stats of the 100-people population and the 1000-people population to a file and compare them
"""
pop2=[]
np.random.seed(10)
for x in range(1001):
    personal_age=math.floor(rand.normal(loc=50, scale=20))
    personal_sex=math.floor(rand.binomial(100,0.5))
    if personal_sex==1:
        height_male=math.floor(rand.normal(70,4))
        weight_male=math.floor(10/3*height_male-58+np.random.normal(scale=7))
    else:
        height_female=math.floor(rand.normal(65,3.5))
        weight_female=math.floor(5/2*height_female-15+np.random.normal(scale=7))
    personal_hobbies=[]
    num1=rand.randint(1,6)
    for x in range(num1):
        num2=rand.randint(0,21)
        personal_hobbies.append(hobby_list[num2])
        if personal_sex==1:
            person=Person(personal_age,height_male,weight_male,"m",personal_hobbies)
        else:
            person=Person(personal_age,height_female,weight_female,"f",personal_hobbies)
        pop2.append(person)
pop2=Population(pop2)
pop_stats=pop.stats()
pop2_stats=pop2.stats()
print(pop_stats)
print(pop2_stats)
file_path = "pop_stats_csv.csv"
with open(file_path, 'w', newline="") as csv_file:
    # Sometimes we need to tweak the newline for formatting
    csv_writer = csv.writer(csv_file, quotechar='|', lineterminator='\n')
    #t he writer() method takes similar parameters as reader(), but sets up a means to write data
    field_names = ['AVG_Age', 'AVG_Height', 'AVG_Weight', 'Median_Age','Median_Height','Median_Weight','STD_Age','STD_Height','STD_Weight']
    csv_writer.writerow(field_names) # writerow, like write, lets us write a single record into the csv
    field_values=[str(pop_stats['avg_age']),str(pop_stats['avg_height']),str(pop_stats['avg_weight']),str(pop_stats['median_age']),str(pop_stats['median_height']),str(pop_stats['median_weight']),str(pop_stats['std_age']),str(pop_stats['std_height']),str(pop_stats['std_weight'])]
    field_values2=[str(pop2_stats['avg_age']),str(pop2_stats['avg_height']),str(pop2_stats['avg_weight']),str(pop2_stats['median_age']),str(pop2_stats['median_height']),str(pop2_stats['median_weight']),str(pop2_stats['std_age']),str(pop2_stats['std_height']),str(pop2_stats['std_weight'])]
    csv_writer.writerow(field_values)
    csv_writer.writerow(field_values2)
"""
The results would be considered expected
"""

{'avg_age': 47.186440677966104, 'avg_height': 64.81016949152543, 'avg_weight': 146.01694915254237, 'median_age': 47, 'median_height': 64, 'median_weight': 145, 'std_age': 20.11808217712152, 'std_height': 3.2739798180083413, 'std_weight': 10.984979734906993}
{'avg_age': 50.10806289729006, 'avg_height': 64.50284376045501, 'avg_weight': 145.9391100702576, 'median_age': 50, 'median_height': 64, 'median_weight': 146, 'std_age': 20.713640873451705, 'std_height': 3.4618414165467852, 'std_weight': 11.301853842565407}


In [66]:
#Task 9
"""
Task9 requires us to read from the csv file and do some stats analysis and check if data_set 1-4 and base data
come from the same population
"""
file_path = "dataset.csv"
base_list=[]
new_base_list=[]
list1=[]
new_list1=[]
list2=[]
new_list2=[]
list3=[]
new_list3=[]
list4=[]
new_list4=[]
with open(file_path, newline='') as csvfile:
     spamreader = csv.reader(csvfile, delimiter=',', quotechar='|')
     for row in spamreader:
         #print(row[0])
         base_list.append(row[0])
         list1.append(row[1])
         list2.append(row[2])
         list3.append(row[3])
         list4.append(row[4])
#print(list)
for i in base_list[1:]:
    i=float(i)
    new_base_list.append(i)
#print(new_list)
avg_base=sum(new_base_list)/len(new_base_list)
median_base=statistics.median(new_base_list)
std_base=statistics.stdev(new_base_list)
print("the mean for base list is: "+str(avg_base))
print("the median for base list is: "+str(median_base))
print("the standard deviation for base list is: "+str(std_base))
for i in list1[1:]:
    i=float(i)
    new_list1.append(i)
#print(new_list1)
avg_list1=sum(new_list1)/len(new_list1)
median_list1=statistics.median(new_list1)
std_list1=statistics.stdev(new_list1)
print("the mean for list 1 is: "+str(avg_list1))
print("the median for list 1 is: "+str(median_list1))
print("the standard deviation for list 1 is: "+str(std_list1))
for i in list2[1:]:
    i=float(i)
    new_list2.append(i)
#print(new_list2)
avg_list2=sum(new_list2)/len(new_list2)
median_list2=statistics.median(new_list2)
std_list2=statistics.stdev(new_list2)
print("the mean for list 2 is: "+str(avg_list2))
print("the median for list 2 is: "+str(median_list2))
print("the standard deviation for list 2 is: "+str(std_list2))
for i in list3[1:]:
    i=float(i)
    new_list3.append(i)
#print(new_list3)
avg_list3=sum(new_list3)/len(new_list3)
median_list3=statistics.median(new_list3)
std_list3=statistics.stdev(new_list3)
print("the mean for list 3 is: "+str(avg_list3))
print("the median for list 3 is: "+str(median_list3))
print("the standard deviation for list 3 is: "+str(std_list3))
for i in list4[1:]:
    i=float(i)
    new_list4.append(i)
#print(new_list4)
avg_list4=sum(new_list4)/len(new_list4)
median_list4=statistics.median(new_list4)
std_list4=statistics.stdev(new_list4)
print("the mean for list 4 is: "+str(avg_list4))
print("the median for list 4 is: "+str(median_list4))
print("the standard deviation for list 4 is: "+str(std_list4))
"""
My conclusion is that from the results of the distribution of base data,data_set_1,
data_set_2,data_set_3,data_set_4, data_set 1-3 seem to belong to the same population
as base data. However, data_set4 doesn't belong to the same population as the base data
"""

the mean for base list is: 0.5006045994549996
the median for base list is: 0.5075008115
the standard deviation for base list is: 0.2885006324437237
the mean for list 1 is: 0.5005030197410001
the median for list 1 is: 0.5089525525
the standard deviation for list 1 is: 0.2935030379632535
the mean for list 2 is: 0.5006441007760001
the median for list 2 is: 0.5080393625
the standard deviation for list 2 is: 0.29350237827898434
the mean for list 3 is: 0.504497773051
the median for list 3 is: 0.5070618655
the standard deviation for list 3 is: 0.2934224397133783
the mean for list 4 is: 1.0605858480129993
the median for list 4 is: 0.7628797890000001
the standard deviation for list 4 is: 1.0145139964379368
