# Data Types for Data Science

## Fundamental data types

### Manipulating lists

In [3]:
# Create a list containing the names: baby_names
baby_names = ['Ximena','Aliza','Ayden','Calvin']

# Extend baby_names with 'Rowen' and 'Sandeep'
baby_names.extend(['Rowen','Sandeep'])

# Print baby_names
print(baby_names)

# Find the position of 'Aliza': position
position = baby_names.index('Aliza')

# Remove 'Aliza' from baby_names
baby_names.pop(position)

# Print baby_names
print(baby_names)

['Ximena', 'Aliza', 'Ayden', 'Calvin', 'Rowen', 'Sandeep']
['Ximena', 'Ayden', 'Calvin', 'Rowen', 'Sandeep']


### Looping over lists

In [8]:
records = [['2011', 'FEMALE', 'HISPANIC', 'GERALDINE', '13', '75'], ['2011', 'FEMALE', 'HISPANIC', 'GIA', '21', '67'], ['2011', 'FEMALE', 'HISPANIC', 'GIANNA', '49', '42'], ['2011', 'FEMALE', 'HISPANIC', 'GISELLE', '38', '51'], ['2011', 'FEMALE', 'HISPANIC', 'GRACE', '36', '53'], ['2011', 'FEMALE', 'HISPANIC', 'GUADALUPE', '26', '62'], ['2011', 'FEMALE', 'HISPANIC', 'HAILEY', '126', '8'], ['2011', 'FEMALE', 'HISPANIC', 'HALEY', '14', '74'], ['2011', 'FEMALE', 'HISPANIC', 'HANNAH', '17', '71']]

# Create the empty list: baby_names
baby_names = []

# Loop over records 
for row in records:
    # Add the name to the list
    baby_names.append(row[3])
    
# Sort the names in alphabetical order
for name in sorted(baby_names):
    # Print each name
    print(name)

GERALDINE
GIA
GIANNA
GISELLE
GRACE
GUADALUPE
HAILEY
HALEY
HANNAH


### Tuple

### Using and unpacking tuples

In [10]:
girl_names = ['Chloe', 'SOPHIA', 'LONDON', 'TAYLOR', 'Ava', 'MAKAYLA', 'RACHEL','GABRIELLE','LEAH', 'Taylor', 'Brielle', 'AVA', 'MADISON']
boy_names = ['Amir', 'Jack', 'Ryan', 'Jacob', 'Benjamin', 'DAVID', 'MATTHEW', 'Elijah', 'DANIEL', 'JOSIAH', 'Daniel', 'Eric', 'Mason']
# Pair up the boy and girl names: pairs
pairs = zip(girl_names, boy_names)

# Iterate over pairs
for idx, pair in enumerate(pairs):
    # Unpack pair: girl_name, boy_name
    girl_name, boy_name = pair
    # Print the rank and names associated with each rank
    print('Rank {}: {} and {}'.format(idx, girl_name, boy_name))

Rank 0: Chloe and Amir
Rank 1: SOPHIA and Jack
Rank 2: LONDON and Ryan
Rank 3: TAYLOR and Jacob
Rank 4: Ava and Benjamin
Rank 5: MAKAYLA and DAVID
Rank 6: RACHEL and MATTHEW
Rank 7: GABRIELLE and Elijah
Rank 8: LEAH and DANIEL
Rank 9: Taylor and JOSIAH
Rank 10: Brielle and Daniel
Rank 11: AVA and Eric
Rank 12: MADISON and Mason


### Making tuples by accident

In [11]:
# Create the normal variable: normal
normal = 'simple'

# Create the mistaken variable: error
error = 'trailing comma',

# Print the types of the variables
print(type(normal))
print(type(error))

<type 'str'>
<type 'tuple'>


### Finding all the data and the overlapping data between sets

In [13]:
baby_names_2011 = {'Jazmine', 'Tiana', 'Jordyn', 'Malka', 'Sabrina', 'Eliza', 'Shawn', 'Aileen', 'Javier', 'Heidy', 'Jencarlos', 'Jesse', 'Mina'}
baby_names_2014 =  {'Yidel', 'Yitzchok', 'Roizy', 'Jaden', 'Kareem', 'Emanuel', 'Bridget', 'Jesus', 'Mirel', 'Hindy', 'Maggie', 'Kimora', 'Nolan', 'Yael', 'Philip', 'Prince', 'Shlome', 'Justin'}
# Find the union: all_names
all_names = baby_names_2011.union(baby_names_2014)

# Print the count of names in all_names
print(len(all_names))

# Find the intersection: overlapping_names
overlapping_names = baby_names_2011.intersection(baby_names_2014)

# Print the count of names in overlapping_names
print(len(overlapping_names))

31
0


### Determining set differences

In [14]:
# Create the empty set: baby_names_2011
baby_names_2011 = set()

# Loop over records and add the names from 2011 to the baby_names_2011 set
for row in records:
    # Check if the first column is '2011'
    if row[0] == '2011':
        # Add the fourth column to the set
        baby_names_2011.add(row[3])

# Find the difference between 2011 and 2014: differences
differences = baby_names_2011.difference(baby_names_2014)

# Print the differences
print(differences)

set(['HANNAH', 'GERALDINE', 'HALEY', 'HAILEY', 'GISELLE', 'GIANNA', 'GUADALUPE', 'GIA', 'GRACE'])


### Creating and looping through dictionaries

In [18]:
female_baby_names_2012 = {('MADISON', 1), ('CYNTHIA', 36), ('SARAH', 12), ('CHAVY', 79), ('SUMMER', 31), ('SERENITY', 36), ('MICHELLE', 43), ('AMINA', 77), ('MELINA', 77), ('LAILA', 74), ('SLOANE', 74)}
# Create an empty dictionary: names
names = {}

# Loop over the girl names
for name, rank in female_baby_names_2012:
    # Add each name to the names dictionary using rank as the key
    
    names[rank] = name
    
# Sort the names list by rank in descending order and slice the first 10 items
for rank in sorted(names, reverse=True)[:10]:
    # Print each item
    print(names[rank])

CHAVY
AMINA
LAILA
MICHELLE
SERENITY
SUMMER
SARAH
MADISON


### Safely finding by key

In [22]:
names = {1: 'EMMA', 2: 'LEAH', 3: 'SARAH', 4: 'SOPHIA', 5: 'ESTHER', 6: 'RACHEL', 7: 'CHAYA', 8: 'AVA', 9: 'CHANA'}

# Safely print rank 7 from the names dictionary
print(names.get(7))

# Safely print the type of rank 100 from the names dictionary
print(type(names.get(2)))

# Safely print rank 105 from the names dictionary or 'Not Found'
print(names.get(105, 'Not Found'))

CHAYA
<type 'str'>
Not Found


### Adding and extending dictionaries

In [43]:
names_2011 = {1: 'Michael', 2: 'Joseph', 3: 'Jacob', 4: 'David', 5: 'Benjamin', 6: 'Moshe', 7: 'Daniel', 8: 'Alexander}
boy_names = {2012: {}, 2013: {1: 'David',  2: 'Joseph',  3: 'Michael',  4: 'Moshe',  5: 'Daniel',  6: 'Benjamin',  7: 'James}}

# Assign the names_2011 dictionary as the value to the 2011 key of boy_names
boy_names[2011] = names_2011

# Update the 2012 key in the boy_names dictionary
boy_names[2012].update([(1,'Casey'), (2,'Aiden')])

# Loop over the boy_names dictionary 
for year in boy_names:
    # Loop over and sort the data for each year by descending rank
    for rank in sorted(boy_names[year], reverse=True)[:1]:
        # Check that you have a rank
        if not rank:
            print(year, 'No Data Available')
        # Safely print the year and the least popular name or 'Not Available'
        print(year, boy_names[year].get(rank, 'Not Available'))

SyntaxError: EOL while scanning string literal (<ipython-input-43-9b19e9bc66fd>, line 1)

### Popping and deleting from dictionaries

In [39]:
female_names = {2011: {1: 'Olivia',  2: 'Esther',  3: 'Rachel',  4: 'Leah',  5: 'Emma',  6: 'Chaya',  7: 'Sarah',  8: 'Sophia',  9: 'Ava',  10: 'Miriam'}, 2012: {}, 2013: {1: 'Olivia',  2: 'Emma',  3: 'Esther',  4: 'Sophia',  5: 'Sarah',  6: 'Leah',  7: 'Rachel',  8: 'Chaya',  9: 'Miriam',  10: 'Chana'}, 2014: {1: 'Olivia',  2: 'Esther',  3: 'Rachel',  4: 'Leah',  5: 'Emma',  6: 'Chaya',  7: 'Sarah',  8: 'Sophia',  9: 'Ava', 10: 'Miriam'}}

In [41]:
# Remove 2011 and store it: female_names_2011
female_names_2011 = female_names.pop(2011)

# Safely remove 2015 with an empty dictionary as the default: female_names_2015
female_names_2015 = female_names.pop(2015,{})

# Delete 2012
del female_names[2012]

# Print female_names
print(female_names)

{2013: {1: 'Olivia', 2: 'Emma', 3: 'Esther', 4: 'Sophia', 5: 'Sarah', 6: 'Leah', 7: 'Rachel', 8: 'Chaya', 9: 'Miriam', 10: 'Chana'}, 2014: {1: 'Olivia', 2: 'Esther', 3: 'Rachel', 4: 'Leah', 5: 'Emma', 6: 'Chaya', 7: 'Sarah', 8: 'Sophia', 9: 'Ava', 10: 'Miriam'}}
