In [1]:
from csv import reader

# Read the `artworks_clean.csv` file
opened_file = open('artworks_clean.csv', encoding='utf-8')
read_file = reader(opened_file)
moma = list(read_file)
moma = moma[1:]

In [2]:
# Converted birthdate values
for row in moma:
    birth_date = row[3]
    if birth_date != "":
        birth_date = int(birth_date)
    row[3] = birth_date
    
# Converted death date values
for row in moma:
    death_date = row[4]
    if death_date != "":
        death_date = int(death_date)
    row[4] = death_date

In [3]:
for row in moma:
    date = row[6]
    if date != "":
        date = int(date)
    row[6] = date

In [4]:
ages = []

for row in moma:
    date = row[6]
    birth = row[3]
    if birth != "":
        age = date - birth
    if birth == "":
        age = 0
    ages.append(age)

In [5]:
final_ages = []

for age in ages:
    if age > 20:
        final_age = age
    if age <= 20:
        final_age = 'Unknown'
    final_ages.append(final_age)

In [6]:
decades = []

for age in final_ages:
    if age == 'Unknown':
        decade = age
    if age != 'Unknown':
        decade = str(age)
        decade = decade[:-1]
        decade = decade + '0s'
    decades.append(decade)

In [7]:
decade_frequency = {}

for item in decades:
    if item not in decade_frequency:
        decade_frequency[item] = 1
    else:
        decade_frequency[item] += 1

## The .format method for string output

### .format method demo

`artist = "Pablo Picasso"
birth_year = 1881`

#### basic format
`output = "{} was born in {}.".format(artist, birth_year)
print(output)`

#### index format
`output = "{0} was born in {1}. {1} is a year in the 1800s.".format(artist, birth_year)
print(output)`

#### keyword format
`output = "{name} was born in {year}. {year} is a year in the 1800s.".format(name=artist, year=birth_year)
print(output)`

This will create a frequency table of artists and the number of artworks in the dataset

In [8]:
artist_freq = {}

for row in moma:
    a = row[1]
    
    if a not in artist_freq:
        artist_freq[a] = 1
        
    else:
        artist_freq[a] += 1
        
print(artist_freq)

{'Sarah Charlesworth': 1, 'Pablo Palazuelo': 4, 'Maurice Denis': 71, 'Aristide Maillol': 77, 'Eugène Atget': 705, 'Antonio Frasconi': 41, 'Garry Winogrand': 47, 'Diane Victor': 4, 'David Brown Milne': 2, 'Jean Dubuffet': 206, 'Jim Dine': 57, 'František Kupka': 37, 'Franklin Chenault Watkins': 4, 'Christopher Wool': 19, 'Abraham Walkowitz': 19, 'Pierre Alechinsky': 67, 'Frank Stella': 17, 'Frank Lloyd Wright': 112, 'Vicente Rojo': 5, 'Ludwig Mies van der Rohe': 318, 'Varvara Stepanova': 6, 'Richard Serra': 4, 'Robert Filliou': 15, 'Roger Chancel': 3, 'Pierre Bonnard': 129, 'Jacqueline Poncelet': 1, 'Émile Bernard': 83, 'Georg Baselitz': 14, 'Frans Masereel': 34, 'Unknown': 448, 'Sol LeWitt': 89, 'James Tenney': 1, 'Claes Oldenburg': 12, 'Dieter Roth': 18, 'Moisei Fradkin': 1, 'Richard Lindner': 1, 'Wojciech Prazmowski': 2, 'Thomas Bewick': 49, 'Spencer Sweeney': 2, 'Batiste Madalena': 5, 'On Kawara': 9, 'Andy Warhol': 41, 'Lee Friedlander': 180, 'Joan Miró': 78, 'Marc Chagall': 173, 'Ro

This will create a function that summarizes an artist's work by giving \n
how many artworks the artist has created within the dataset.

In [9]:
def artist_summary(artist):
    num_artworks = artist_freq[artist]
    template = "There are {num} artworks by {name} in the dataset"
    output = template.format(name=artist, num=num_artworks)
    print(output)

artist_summary("Henri Matisse")

There are 129 artworks by Henri Matisse in the dataset


### Other .format() method uses:

In [10]:
pop_millions = [
    ["China", 1379.302771],
    ["India", 1281.935991],
    ["USA",  326.625791],
    ["Indonesia",  260.580739],
    ["Brazil",  207.353391],
]
print(pop_millions[0][0])

China


In [11]:
print("{} has a population of {} million people.".format(pop_millions[0][0], pop_millions[0][1]))

#To format this statement with two decimals use the following syntax:
print("{} has a population of {pop:.2f} million people.".format(pop_millions[0][0], pop=pop_millions[0][1]))

China has a population of 1379.302771 million people.
China has a population of 1379.30 million people.


In [12]:
India = 1281935991

print('India has a population of {}'.format(India))

# To format a large number with comma as a thousands separator, use the following syntax:
print('India has a population of {0:,}'.format(India))

India has a population of 1281935991
India has a population of 1,281,935,991


In [13]:
for c in pop_millions:
    country = c[0]
    pop = c[1]
    template = "The population of {} is {:,.2f} million"
    print(template.format(country, pop))

The population of China is 1,379.30 million
The population of India is 1,281.94 million
The population of USA is 326.63 million
The population of Indonesia is 260.58 million
The population of Brazil is 207.35 million


### The dict.items() method

In [17]:
fruit_freq = {'orange': 4,
              'banana': 4,
              'apple': 2}

for fruit, qty in fruit_freq.items():
    output = "I have {q} {f}s".format(f=fruit, q=qty)
    print(output)

I have 4 oranges
I have 4 bananas
I have 2 apples


# Working with the artworks dataset again

### Creating a gender frequency table

In [29]:
gender_freq = {}

for g in moma[1:]:
    gender = g[5]
    if gender not in gender_freq:
        if gender != "":
            gender_freq[gender] = 0
    if gender in gender_freq:
        gender_freq[gender] += 1

### Printing a string to display the number of artworks creating by each gender

In [30]:
for gender, artworks in gender_freq.items():
    output = "There are {a:,} artworks by {g} artists".format(g=gender, a=artworks)
    print(output)

There are 13,491 artworks by Male artists
There are 2,442 artworks by Female artists
There are 791 artworks by Gender Unknown/Other artists
