In [1]:
%load_ext autoreload
%autoreload 2

In [15]:
import pandas as pd

from collections import Counter, defaultdict, namedtuple
from dataclasses import dataclass

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import warnings
warnings.filterwarnings("ignore")

In [3]:
import sys
sys.path.append("../../../") 

from utils.paths import make_dir_line

modality = 'c'
project = 'Data Types for Data Science in Python'
data = make_dir_line(modality, project)

raw = data('raw')

# 6.4.0 Advanced Data Types

## 6.4.2 Using Counter on lists

In [4]:
from info import penguins2 as penguins

In [5]:
# Create a Counter of the penguins sex using a list comp
penguins_sex_counts = Counter([penguin['Sex'] for penguin in penguins])

# Print the penguins_sex_counts
print(penguins_sex_counts)

Counter({'MALE': 15, 'FEMALE': 5})


## 6.4.3 Finding most common elements

In [6]:
# Create a Counter of the penguins list: penguins_species_counts
penguins_species_counts = Counter([penguin['Species'] for penguin in penguins])

# Find the 3 most common species counts
print(penguins_species_counts.most_common(3))

[('Chinstrap', 7), ('Adlie', 7), ('Gentoo', 6)]


## 6.4.5 Creating dictionaries of an unknown structure

In [7]:
from info import weight_log

In [8]:
# Create an empty dictionary: female_penguin_weights
female_penguin_weights = {}

# Iterate over the weight_log entries
for species, sex, body_mass in weight_log:
    # Check to see if species is already in the dictionary
    if species not in female_penguin_weights:
        # Create an empty list for any missing species
        female_penguin_weights[species] = []
    # Append the sex and body_mass as a tuple to the species keys list
    female_penguin_weights[species].append((sex, body_mass))
    
# Print the weights for 'Adlie'
print(female_penguin_weights['Adlie'])

[('FEMALE', 3450.0), ('FEMALE', 3550.0), ('FEMALE', 3175.0)]


## 6.4.6 Safely appending to a key's value list

In [9]:
from info import weight_log2 as weight_log

In [10]:
# Import defaultdict
# from collections import defaultdict

# Create a defaultdict with a default type of list: male_penguin_weights
male_penguin_weights = defaultdict(list)

# Iterate over the weight_log entries
for species, sex, body_mass in weight_log:
    # Use the species as the key, and append the body_mass to it
    male_penguin_weights[species].append(body_mass)
    
# Print the first 2 items of the ridership dictionary
print(list(male_penguin_weights.items())[:2])

[('Gentoo', [5500.0, 5800.0, 5400.0, 5250.0, 4925.0]), ('Chinstrap', [4300.0, 4100.0, 4800.0, 3950.0, 3800.0, 4050.0])]


## 6.4.8 Creating namedtuples for storing data

In [11]:
from info import weight_log3 as weight_log

In [12]:
# Import namedtuple from collections
from collections import namedtuple

# Create the namedtuple: SpeciesDetails
SpeciesDetails = namedtuple('SpeciesDetails', ['species', 'sex', 'body_mass'])

# Create the empty list: labeled_entries
labeled_entries = []

# Iterate over the weight_log entries
for species, sex, body_mass in weight_log:
    # Append a new SpeciesDetails namedtuple instance for each entry to labeled_entries
    labeled_entries.append(SpeciesDetails(species, sex, body_mass))
    
print(labeled_entries[:5])

[SpeciesDetails(species='Gentoo', sex='MALE', body_mass=5500.0), SpeciesDetails(species='Chinstrap', sex='MALE', body_mass=4300.0), SpeciesDetails(species='Adlie', sex='MALE', body_mass=3800.0), SpeciesDetails(species='Gentoo', sex='MALE', body_mass=5800.0), SpeciesDetails(species='Chinstrap', sex='MALE', body_mass=4100.0)]


## 6.4.9 Leveraging attributes on namedtuples

In [13]:
# Iterate over the first twenty entries in labeled_entries
for entry in labeled_entries[:20]:
    # if the entry's species is Chinstrap
    if entry.species == 'Chinstrap':
      # Print each entry's sex and body_mass seperated by a colon
        print(f'{entry.sex}:{entry.body_mass}')

MALE:4300.0
MALE:4100.0
MALE:4800.0
FEMALE:3800.0
MALE:3950.0
MALE:3800.0
MALE:4050.0


## 6.4.11 Creating a dataclass

In [16]:
# Import dataclass
# from dataclasses import dataclass

@dataclass
class WeightEntry:
    # Define the fields on the class
    species: str
    flipper_length: int
    body_mass: int
    sex: str
        
    # Define a property that returns the body_mass / flipper_length
    @property
    def mass_to_flipper_length_ratio(self):
        return self.body_mass / self.flipper_length

## 6.4.12 Using dataclasses

In [18]:
from info import weight_log4 as weight_log

In [19]:
@dataclass
class WeightEntry:
    # Define the fields on the class
    species: str
    flipper_length: int
    body_mass: int
    sex: str

    @property
    def mass_to_flipper_length_ratio(self):
        return self.body_mass / self.flipper_length

In [20]:
# Create the empty list: labeled_entries
labeled_entries = []

# Iterate over the weight_log entries
for species, flipper_length, body_mass, sex in weight_log:
    # Append a new WeightEntry instance to labeled_entries
    labeled_entries.append(WeightEntry(species, sex, body_mass, flipper_length))
    
# Print a list of the first 5 mass_to_flipper_length_ratio values
print([entry.mass_to_flipper_length_ratio for entry in labeled_entries[:5]])

[23.91304347826087, 25.32751091703057, 24.0, 23.972602739726028, 20.476190476190474]


In [14]:
print('Ok_')

Ok_
