MTA Art Catalog

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
url = "https://data.ny.gov/resource/4y8j-9pkd.csv"

df = pd.read_csv(url)
df.head()

In [None]:
df.info()

print("Rows:", df.shape[0], "Columns:", df.shape[1])

df.isna().sum().head(10)


In [None]:
df = df.drop(columns=['line', 'art_description', 'art_image_link'])
df.head()

In [None]:
df['art_date'] = pd.to_datetime(df['art_date'], format='%Y')
display(df.head())

In [None]:
print(df['art_material'].unique())

In [None]:
material_mapping = {
    'Ceramic tile': 'Ceramic',
    'Ceramic tiles': 'Ceramic',
    'Ceramic - porcelain tiles': 'Ceramic',
    'Ceramic - Terra cotta\x0bcarved relief': 'Ceramic',
    'Porcelain Enamel, ceramic tile': 'Ceramic',
    'Porcelain enamel, Ceramic tile with text': 'Ceramic',
    'Ceramic tile, Glass mosaic': 'Ceramic and Glass Mosaic',
    'Ceramic mosaic': 'Ceramic Mosaic',
    'Ceramic tile, Bronze': 'Ceramic and Bronze',
    'Ceramic mosaic, Wrought iron': 'Ceramic Mosaic and Wrought Iron',
    'Ceramic relief tile': 'Ceramic',
    'Ceramic Tile, Glass mosaic': 'Ceramic and Glass Mosaic',
    'Ceramic Tiles': 'Ceramic',
    'Ceramic and Glass mosaic': 'Ceramic and Glass Mosaic',
    'Ceramic tile, Wrought iron, Stone': 'Ceramic, Wrought Iron, and Stone',
    'Ceramic Tile, glass mosaic, Stone - absolute black granite': 'Ceramic, Glass Mosaic, and Stone',
    'Glass and ceramic mosaic, handmade ceramic relief tiles, hand-cast glass, bronze and cut granite floor tiles': 'Glass and Ceramic Mosaic',
    'Steel, ceramic tile, granite, fiberglass': 'Steel, Ceramic, Granite, and Fiberglass',
    'Ceramic tile, Bronze medallions': 'Ceramic and Bronze',
    'Terra cotta artifacts and glass mosaic': 'Terracotta and Glass Mosaic',
    'Glazed Ceramic, Lighting Elements': 'Ceramic',
    'Ceramic, glass, and marble mosaic': 'Ceramic, Glass, and Marble Mosaic',
    'Celadon fragments and glass mosaic on retaining walls and station building': 'Ceramic and Glass Mosaic',
    'Glass and ceramic mosaic in waiting room and stair and escalator enclosures, terrazzo floor in ticket office': 'Glass and Ceramic Mosaic',
    'Hand-made ceramic tile, porcelain tile': 'Ceramic',
    'Faceted glass; glass mosaic': 'Glass Mosaic',
    'Glass or ceramic mosaic': 'Glass or Ceramic Mosaic',
    'Laminated Glass and Mosaic': 'Glass and Mosaic',
    'ceramic mosaic': 'Ceramic Mosaic',
    'Glass mosaic and laminated glass': 'Glass and Mosaic',
    'Glass mosaic and photo-engraved tiles': 'Glass Mosaic and Tiles',
    'Mosaic/Tile, Laminated glass': 'Mosaic and Laminated Glass',
    'Ceramic forms and glass mosaic': 'Ceramic and Glass Mosaic',
    'Laminated glass and mosaic': 'Glass and Mosaic',
    'Glass and ceramic mosaic': 'Glass and Ceramic Mosaic',
    'Laminated glass, glass mosaic, and ceramic tile': 'Glass Mosaic and Ceramic',
    'Mosaic and hand-glazed ceramic': 'Mosaic and Ceramic',
    'Mosaic and glass': 'Mosaic and Glass',
    'Glass mosaic and ceramic-printed glass': 'Glass Mosaic and Ceramic',
    'Ceramic mosaic and handmade ceramic tile': 'Ceramic Mosaic and Ceramic',
    'Glass Mosaic and Ceramic Tile': 'Glass Mosaic and Ceramic',
    'Laminated glass, glass mosaic': 'Glass Mosaic',
    'Glass mosaic and glazed tile': 'Glass Mosaic and Tile',
    'Ceramic Mosaic': 'Ceramic Mosaic',
    'glass and ceramic mosaic': 'Glass and Ceramic Mosaic',
    'Ceramic Mosiac & Steel': 'Ceramic Mosaic and Steel',
    'Mosaic & Laminated glass': 'Mosaic and Laminated Glass',
    'Mosaic and printed ceramic tile': 'Mosaic and Ceramic',
}

df['art_material'] = df['art_material'].replace(material_mapping)

print(df['art_material'].unique())

In [None]:
print(df['station_name'].unique())
print(df['artist'].unique())
print(df['art_title'].unique())

In [None]:
print(df['agency'].unique())

In [None]:
station_counts = df['station_name'].value_counts().nlargest(10)

plt.figure(figsize=(10, 6))
station_counts.plot(kind='bar')
plt.title('Top 10 Stations by Number of Artworks')
plt.xlabel('Station Name')
plt.ylabel('Number of Artworks')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

In [None]:
yearly_counts = df['art_date'].dt.year.value_counts().sort_index()

plt.figure(figsize=(12, 6))
yearly_counts.plot(kind='line')
plt.title('Number of Artworks by Year')
plt.xlabel('Year')
plt.ylabel('Number of Artworks')
plt.grid(True)
plt.show()

In [None]:
material_counts = df['art_material'].value_counts().nlargest(10)

plt.figure(figsize=(10, 6))
material_counts.plot(kind='bar')
plt.title('Top 10 Art Materials by Number of Artworks')
plt.xlabel('Art Material')
plt.ylabel('Number of Artworks')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

I loaded the MTA Art Catalog data from the provided URL into a pandas DataFrame. I identified and dropped columns with missing values that werenâ€™t essential for my analysis, such as line, art_description, and art_image_link. I converted the art_date column to a datetime format. I also started standardizing the art_material column by mapping some variations to more consistent terms, though further standardization could still be done for a more detailed analysis. Finally, I analyzed the dataset to see which stations had the most artworks, how the number of installations varied by year, and which materials were most commonly used.