# Intermediate Python

In [1]:
from csv import reader

file = 'Data/artworks.csv'
open_file = open(file, encoding='utf-8')
read_file = reader(open_file)
art = list(read_file)
open_file.close()

In [2]:
len(art)

16726

In [3]:
headers = art[0]
moma = art[1:]

In [4]:
headers

['Title',
 'Artist',
 'Nationality',
 'BeginDate',
 'EndDate',
 'Gender',
 'Date',
 'Department']

## Data Description
* Title: The title of the artwork.
* Artist: The name of the artist who created the artwork.
* Nationality: The nationality of the artist.
* BeginDate: The year in which the artist was born.
* EndDate: The year in which the artist died.
* Gender: The gender of the artist.
* Date: The date that the artwork was created.
* Department: The department inside MoMA to which the artwork belongs.

### Using String Replacement
Remove '(' and ')' from the gender and nationality columns

In [5]:
moma[0]

['Dress MacLeod from Tartan Sets',
 'Sarah Charlesworth',
 '(American)',
 '(1947)',
 '(2013)',
 '(Female)',
 '1986',
 'Prints & Illustrated Books']

In [6]:
for row in moma:
    nationality = row[2]
    nationality = nationality.replace('(','')
    nationality = nationality.replace(')','')
    row[2] = nationality
    
    gender = row[5]
    gender = gender.replace('(','')
    gender = gender.replace(')','')
    row[5] = gender

In [7]:
moma[0]

['Dress MacLeod from Tartan Sets',
 'Sarah Charlesworth',
 'American',
 '(1947)',
 '(2013)',
 'Female',
 '1986',
 'Prints & Illustrated Books']

### Data Cleaning

In [8]:
for row in moma:
    # Clean/standardize gender
    gender = row[5]
    gender = gender.title()
    if not gender:
        gender = 'Gender Unknown/Other'
    row[5] = gender 
    
    # Clean/standardize nationality
    nationality = row[2]
    nationality = nationality.title()
    if not nationality:
        nationality = 'Nationality Unknown'
    row[2] = nationality 

In [9]:
moma[0]

['Dress MacLeod from Tartan Sets',
 'Sarah Charlesworth',
 'American',
 '(1947)',
 '(2013)',
 'Female',
 '1986',
 'Prints & Illustrated Books']

### Clean and Convert Start/End Dates (Artist's birthdate and deathdate)

In [10]:
def clean_and_convert(date):
    # check that we don't have an empty string
    if date != "":
        # move the rest of the function inside
        # the if statement
        date = date.replace("(", "")
        date = date.replace(")", "")
        date = int(date)
    return date

for row in moma:
    BeginDate = row[3]
    EndDate = row[4]
    clean_BeginDate = clean_and_convert(BeginDate)
    clean_EndDate = clean_and_convert(EndDate)
    row[3] = clean_BeginDate
    row[4] = clean_EndDate

### Cleaning the "Date" Column
Example Data:
* 1912
* 1929
* 1913-1923
* (1951)
* 1994
* 1934
* c. 1915
* 1995
* c. 1912
* (1988)
* 2002
* 1957-1959
* c. 1955.
* c. 1970's
* C. 1990-1999

#### Prototyping

In [11]:
strings = ["good!", "morn?ing", "good?!", "morniZZZZng"]
bad_chars = ["!", "?", "Z"]

In [12]:
def strip_chars(string):
    for char in bad_chars:
        string = string.replace(char, "")
    return string

In [13]:
cleaned_strings = []
for string in strings:
    string = strip_chars(string)
    cleaned_strings.append(string)

In [14]:
cleaned_strings

['good', 'morning', 'good', 'morning']

#### Application for `Date` Column

In [15]:
test_data = ["1912", "1929", "1913-1923",
             "(1951)", "1994", "1934",
             "c. 1915", "1995", "c. 1912",
             "(1988)", "2002", "1957-1959",
             "c. 1955.", "c. 1970's", 
             "C. 1990-1999"]

bad_chars = ["(",")","c","C",".","s","'", " "]

def strip_characters(string):
    for char in bad_chars:
        string = string.replace(char, '')
    return string

stripped_test_data = []

for date in test_data:
    clean_date = strip_characters(date)
    stripped_test_data.append(clean_date)

print(stripped_test_data)

['1912', '1929', '1913-1923', '1951', '1994', '1934', '1915', '1995', '1912', '1988', '2002', '1957-1959', '1955', '1970', '1990-1999']


### Further `Date` Cleaning
There are still some year ranges. Since we're dealing with artists' age, being exact isn't necessarily integral to our analysis. Here's how we'll proceed:
* Where there is a single year, we'll keep it.
* Where there is a year range, we'll average the two years.

In [16]:
def process_date(string):
    if '-' in string:
        split_string = string.split('-')
        date1 = int(split_string[0])
        date2 = int(split_string[1])
        avg = (date1 + date2) / 2
        rounded_avg = round(avg)
        return rounded_avg
    return int(string)

In [17]:
test_range = '1990'
process_date(test_range)

1990

In [18]:
processed_test_data = []

for year in stripped_test_data:
    processed_year = process_date(year)
    processed_test_data.append(processed_year)

In [19]:
len(processed_test_data)

15

In [20]:
print(stripped_test_data)

['1912', '1929', '1913-1923', '1951', '1994', '1934', '1915', '1995', '1912', '1988', '2002', '1957-1959', '1955', '1970', '1990-1999']


### Use New Processing Function on Moma Dataset

In [21]:
print(list(enumerate(headers)))

[(0, 'Title'), (1, 'Artist'), (2, 'Nationality'), (3, 'BeginDate'), (4, 'EndDate'), (5, 'Gender'), (6, 'Date'), (7, 'Department')]


In [22]:
for record in moma:
    date = record[6]
    stripped_date = strip_characters(date)
    processed_date = process_date(stripped_date)
    record[6] = processed_date

In [23]:
# Quick verification
for record in moma[:15]:
    print(record[6])

1986
1978
1900
1934
1903
1957
1924
1980
2001
1941
1950
1963
1910
1934
1997


### Formatting Strings

#### `str.format()` Method

In [24]:
artist = "Pablo Picasso"
birth_year = 1881

template = "{}'s birth year is {}".format(artist, birth_year)
print(template)

Pablo Picasso's birth year is 1881


### Use `str.format()` to Loop Through a List

In [25]:
pop_millions = [
    ["China", 1379.302771],
    ["India", 1281.935991],
    ["USA",  326.625791],
    ["Indonesia",  260.580739],
    ["Brazil",  207.353391],
]

template = 'The population of {} is {:,.2f} million'

for country in pop_millions:
    name = country[0]
    population = country[1]
    output = template.format(name, population)
    print(output)

The population of China is 1,379.30 million
The population of India is 1,281.94 million
The population of USA is 326.63 million
The population of Indonesia is 260.58 million
The population of Brazil is 207.35 million


## Object-Oriented Python

### Classes
#### Revisiting the `type()` Function

In [26]:
l = [1, 2, 3]
s = "string"
d = {"a": 1, "b": 2}
my_set = {2, 3, 5}

print(type(l))
print(type(s))
print(type(d))
print(type(my_set))

<class 'list'>
<class 'str'>
<class 'dict'>
<class 'set'>


#### Sets

In [27]:
order_1 = {0, 2, 3, 1}
order_2 = {1, 3, 2, 0}

print(order_1, order_2, sep="\n")
print("order_1 is equal to order_2: {ans}".format(ans=order_1==order_2))

{0, 1, 2, 3}
{0, 1, 2, 3}
order_1 is equal to order_2: True


In [28]:
even = set()
even.add(17)
print(even)

{17}


In [29]:
tri_num_sequence = [1, 3, 6, 10, 15, 10, 6, 3, 1]
odd_numbers = [1, 3, 5, 7, 9, 11, 13, 15, 17, 19]

# Create a set whose elements are those of tri_num_sequence. Assign it to trinum_5.
trinum_5 = set(tri_num_sequence)

# Create a set whose elements are the positive odd numbers smaller than 20. Assign it to odd_20.
# odd_20 = {num for num in range(0,20) if num % 2 != 0}
odd_20 = set(odd_numbers)

# Create a set whose elements are the odd numbers in trinum_5. Assign it to odd_trinum.
odd_trinum = trinum_5.intersection(odd_20)

print(odd_trinum)

{1, 3, 15}


### Classes and Methods

In [30]:
class NewList():
    def first_method():
        print('This is my first method')

newlist = NewList()

In [31]:
newlist.first_method()

TypeError: first_method() takes 0 positional arguments but 1 was given

**What is causing this error?**

In [None]:
s = "MY STRING"

result = str.title(s)
print(result)

So, `instance.Method_Name` == `Class_Name.Method_Name(instance)`

In [None]:
class MyClass():
    def print_self(self):
        print(self)
        
mc = MyClass()

In [None]:
print(mc)

In [None]:
mc.print_self()

The same output was displayed both when we printed the object using the syntax print(mc) and when we printed the object inside the method using print_self() — which proves that this "phantom" argument is the object itself

In [None]:
class NewList():
    def first_method(self):
        return "This is my first method"
    
newlist = NewList()
result = newlist.first_method()

print(result)

In [None]:
class NewList():
    def return_list(self, input_list):
        return input_list
    
newlist = NewList()

result = newlist.return_list([1,2,3])

print(result)

In [None]:
class NewList():
    def __init__(self, initial_state):
        # Attribute assignment 'data':
        self.data = initial_state
    def append(self, arg):
        
        
my_list = NewList([1,2,3,4,5])

print(my_list.data)

**Creating Our Own `append()` method:**

In [None]:
class NewList():
    """
    A Python list with some extras!
    """
    def __init__(self, initial_state):
        self.data = initial_state
        
    def append(self, arg):
        self.data += [arg]
        
my_list = NewList([1,2,3,4,5])
print(my_list.data)

my_list.append(6)
print(my_list.data)

**Creating Helper Methods**

In [None]:
class NewList():
    """
    A Python list with some extras!
    """
    def __init__(self, initial_state):
        self.data = initial_state
        self.calc_length()
    
    def append(self, new_item):
        """
        Append `new_item` to the NewList
        """
        self.data = self.data + [new_item]
        self.calc_length()
        
    def calc_length(self):
        length = 0
        for item in self.data:
            length += 1
        self.length = length
        
fibonacci = NewList([1, 1, 2, 3, 5])

print(fibonacci.length)

fibonacci.append(8)
print(fibonacci.length)

### Business Application - Propeller

In [41]:
class Propeller():
    def __init__(self, diameter, pitch):
        self.diameter = diameter
        self.pitch = pitch
        self.condition = 'Repaired'
        self.location = 'Norfolk Yard'
    
    def damage(self):
        self.condition = 'Damaged'

In [44]:
wcp_201142 = Propeller(28, 30)

In [45]:
wcp_201142.condition

'Repaired'

In [46]:
wcp_201142.damage()

In [47]:
wcp_201142.condition

'Damaged'