In [1]:
import pandas as pd
from datetime import date, timedelta
from sqlalchemy import create_engine

### Extract CSVs into DataFrames

In [2]:
artists_file = "Resources/artists.csv"
artists_df = pd.read_csv(artists_file)
artists_df.head()

Unnamed: 0,Artist ID,Name,Nationality,Gender,Birth Year,Death Year
0,1,Robert Arneson,American,Male,1930.0,1992.0
1,2,Doroteo Arnaiz,Spanish,Male,1936.0,
2,3,Bill Arnold,American,Male,1941.0,
3,4,Charles Arnoldi,American,Male,1946.0,
4,5,Per Arnoldi,Danish,Male,1941.0,


In [3]:
artworks_file = "Resources/artworks.csv"
artworks_df = pd.read_csv(artworks_file)
artworks_df.head()

Unnamed: 0,Artwork ID,Title,Artist ID,Name,Date,Medium,Dimensions,Acquisition Date,Credit,Catalogue,...,Classification,Object Number,Diameter (cm),Circumference (cm),Height (cm),Length (cm),Width (cm),Depth (cm),Weight (kg),Duration (s)
0,2,"Ferdinandsbrücke Project, Vienna, Austria, Ele...",6210,Otto Wagner,1896,Ink and cut-and-pasted painted pages on paper,"19 1/8 x 66 1/2"" (48.6 x 168.9 cm)",1996-04-09,Fractional and promised gift of Jo Carole and ...,Y,...,Architecture,885.1996,,,48.6,,168.9,,,
1,3,"City of Music, National Superior Conservatory ...",7470,Christian de Portzamparc,1987,Paint and colored pencil on print,"16 x 11 3/4"" (40.6 x 29.8 cm)",1995-01-17,Gift of the architect in honor of Lily Auchinc...,Y,...,Architecture,1.1995,,,40.6401,,29.8451,,,
2,4,"Villa near Vienna Project, Outside Vienna, Aus...",7605,Emil Hoppe,1903,"Graphite, pen, color pencil, ink, and gouache ...","13 1/2 x 12 1/2"" (34.3 x 31.8 cm)",1997-01-15,Gift of Jo Carole and Ronald S. Lauder,Y,...,Architecture,1.1997,,,34.3,,31.8,,,
3,5,"The Manhattan Transcripts Project, New York, N...",7056,Bernard Tschumi,1980,Photographic reproduction with colored synthet...,"20 x 20"" (50.8 x 50.8 cm)",1995-01-17,Purchase and partial gift of the architect in ...,Y,...,Architecture,2.1995,,,50.8,,50.8,,,
4,6,"Villa, project, outside Vienna, Austria, Exter...",7605,Emil Hoppe,1903,"Graphite, color pencil, ink, and gouache on tr...","15 1/8 x 7 1/2"" (38.4 x 19.1 cm)",1997-01-15,Gift of Jo Carole and Ronald S. Lauder,Y,...,Architecture,2.1997,,,38.4,,19.1,,,


### Transform artists_df

In [4]:
# Create a filtered dataframe from specific columns
artists_cols = ["Name", "Nationality", "Birth Year", "Death Year"]
artists_transformed = artists_df[artists_cols].copy()

#Rename the column headers
artists_transformed = artists_transformed.rename(columns={"Name": "Artist_Name",
                                                         "Nationality": "Nationality",
                                                         "Birth Year": "Birth Year",
                                                         "Death Year": "Death Year"})

#Clean the data by dropping duplicates and clearing out NaN cells
artists_transformed.drop_duplicates(inplace=True)
artists_transformed = artists_transformed.fillna('')

artists_transformed.head()


Unnamed: 0,Artist_Name,Nationality,Birth Year,Death Year
0,Robert Arneson,American,1930,1992.0
1,Doroteo Arnaiz,Spanish,1936,
2,Bill Arnold,American,1941,
3,Charles Arnoldi,American,1946,
4,Per Arnoldi,Danish,1941,


### Transform artworks_df

In [5]:
# Create a filtered dataframe from specific columns
artworks_cols = ["Name", "Title", "Date", "Acquisition Date"]
artworks_transformed = artworks_df[artworks_cols].copy()

#Rename the column headers
artworks_transformed = artworks_transformed.rename(columns={"Name": "Artist_Name",
                                                            "Title": "Artwork Title",
                                                            "Date": "Date of Artwork"})

artworks_transformed.head()

Unnamed: 0,Artist_Name,Artwork Title,Date of Artwork,Acquisition Date
0,Otto Wagner,"Ferdinandsbrücke Project, Vienna, Austria, Ele...",1896,1996-04-09
1,Christian de Portzamparc,"City of Music, National Superior Conservatory ...",1987,1995-01-17
2,Emil Hoppe,"Villa near Vienna Project, Outside Vienna, Aus...",1903,1997-01-15
3,Bernard Tschumi,"The Manhattan Transcripts Project, New York, N...",1980,1995-01-17
4,Emil Hoppe,"Villa, project, outside Vienna, Austria, Exter...",1903,1997-01-15


## Merge artists_transformed with artworks_transformed Dataframes
### Transform moma_df to only account for Contemporary Artists

In [9]:
# Merge artworks_transformed and artists_transformed on Artist Name
moma_df = pd.merge(artworks_transformed, artists_transformed, on='Artist_Name', how='outer')

# Change Birth Year column to from float to integer
moma_df['Birth Year'] = pd.to_numeric(moma_df['Birth Year'], errors='coerce')
moma_df = moma_df.dropna(subset=['Birth Year'])
moma_df['Birth Year'] = moma_df['Birth Year'].astype(int)

# Delete rows where artist is born before 1920
moma_df = moma_df[moma_df['Birth Year'] >= 1920]
moma_df.head(10)


Unnamed: 0,Artist_Name,Artwork Title,Date of Artwork,Acquisition Date,Nationality,Birth Year,Death Year
4,Christian de Portzamparc,"City of Music, National Superior Conservatory ...",1987,1995-01-17,French,1944,
5,Christian de Portzamparc,"LVMH Tower, New York, NY (Study model)",1994-1999,2013-06-03,French,1944,
6,Christian de Portzamparc,"LVMH Tower, New York, NY (Study model)",1994–1999,2013-06-03,French,1944,
7,Christian de Portzamparc,"LVMH Tower, New York, NY (Study model)",1994–1999,2013-06-03,French,1944,
8,Christian de Portzamparc,"LVMH Tower, New York, NY, Study model",1994–1999,2013-06-03,French,1944,
9,Christian de Portzamparc,"LVMH Tower, New York, NY (Study model)",1994–1999,2013-06-03,French,1944,
10,Christian de Portzamparc,"LVMH Tower, New York, NY (Site model)",1994-1999,2013-06-03,French,1944,
11,Christian de Portzamparc,"LVMH Tower, New York, NY (Perspective sketch)",1994-1999,2013-06-03,French,1944,
12,Christian de Portzamparc,"LVMH Tower, New York, NY (Perspective sketch, ...",1994-1999,2013-06-03,French,1944,
13,Christian de Portzamparc,"LVMH Tower, New York, NY (Perspective sketch)",1994-1999,2013-06-03,French,1944,


### Export moma_df to CSV File

In [10]:
moma_df.to_csv(r'Resources/moma.csv',index=False)