# Working on SF open dataset - Civic Art Collection 

In [11]:
import ijson # Because we are working with JSON data
import pandas as pd
import seaborn as sns # For Data Visualizations 
import matplotlib.pyplot as plt
%matplotlib inline
import re
# File name is rows.json . Working on Web data from https://data.sfgov.org/Culture-and-Recreation/SF-Civic-Art-Collection/zfw6-95su

# Listing out the columns

In [12]:
filename="rows.json"

# opening the file as json file and separating the objects and listing the columns
with open(filename,'r') as f:
    obj=ijson.items(f,'meta.view.columns.item')
    columns=list(obj)

    
# using names in columns
column_names=[col["name"] for col in columns]
column_names
#this gives a lot of garbage values in columns, so we will create a list of good columns

[u'sid',
 u'id',
 u'position',
 u'created_at',
 u'created_meta',
 u'updated_at',
 u'updated_meta',
 u'meta',
 u'_id_',
 u'_rev',
 u'accession_id',
 u'artist',
 u'created_at',
 u'credit_line',
 u'display_dimensions',
 u'geometry',
 u'location_description',
 u'medium',
 u'source',
 u'title',
 u'Location 1']

not every data is relevant, so just making sure the relevant columns are taken


In [13]:
good_columns=['_id_','_rev','accession_id','artist','created_at','credit_line',
             'display_dimensions','geometry','location_description','medium','source','title','Location 1']

# creating a list which will give a list of data
data=[]
with open(file_name,'r') as json_file:
    objects=ijson.items(json_file,'data.item')
    for row in objects:
        selected_row=[]
        for item in good_columns:
            selected_row.append(row[column_names.index(item)])
        data.append(selected_row)    #added every row to the data list

In [14]:
data
# This will give dataset in a list format


[[u'_id',
  u'_rev',
  u'accession_id',
  u'artist',
  1320851809,
  u'credit_line',
  u'display_dimensions',
  u'geometry',
  u'location_description',
  u'medium',
  u'source',
  u'title',
  [None, None, None, None, None]],
 [u'63daabb62278559f8a0643be882613c5',
  u'2-2a0d31161230cdd59c47cbf4459ad806',
  u'1995.18',
  u'Acconci, Saitowitz and Soloman',
  1320851809,
  u'Commissioned by the San Francisco Art Commission for the Waterfront Project.',
  u'2 1/2 miles X 5 feet',
  u'{"type":"Point","coordinates":[-122.4120591,37.8085303]}',
  u'Public Display : District 3 : Embarcadero/Waterfront',
  u'Black concrete, with a strip of glass blocks, to be illuminated at night',
  u'San Francisco Arts Commission',
  u'Promenade Ribbon',
  [None, None, None, None, None]],
 [u'63daabb62278559f8a0643be88261503',
  u'2-8418bc9b09e27aba350721c0de8b98ee',
  u'1986.2',
  u'Acconci, Vito',
  1320851809,
  u'Purchased by the San Francisco Art Commission for the San Francisco International Airport',
  

## Make this list more readable, hence Structure it

This is how we make it to a dataframe and we take only the columns that we need from the good_columns list we made previously


In [37]:
artdata=pd.DataFrame(data,columns=good_columns)
#x=re.split(r':',artdata.geometry[2])

#dropping the first row which is a duplicate of the header
artdata=artdata[artdata.geometry!='geometry']
 


# Let's have one final look at the dataset

In [38]:
artdata

Unnamed: 0,_id_,_rev,accession_id,artist,created_at,credit_line,display_dimensions,geometry,location_description,medium,source,title,Location 1
1,63daabb62278559f8a0643be882613c5,2-2a0d31161230cdd59c47cbf4459ad806,1995.18,"Acconci, Saitowitz and Soloman",1320851809,Commissioned by the San Francisco Art Commissi...,2 1/2 miles X 5 feet,"{""type"":""Point"",""coordinates"":[-122.4120591,37...",Public Display : District 3 : Embarcadero/Wate...,"Black concrete, with a strip of glass blocks, ...",San Francisco Arts Commission,Promenade Ribbon,"[None, None, None, None, None]"
2,63daabb62278559f8a0643be88261503,2-8418bc9b09e27aba350721c0de8b98ee,1986.2,"Acconci, Vito",1320851809,Purchased by the San Francisco Art Commission ...,"26 1/2"" H X 41"" W","{""type"":""Point"",""coordinates"":[-122.389979,37....",International Airport : T3 : Lower Level,Etching on Paper,San Francisco Arts Commission,Two Wings for Wall and Person,"[None, None, None, None, None]"
3,63daabb62278559f8a0643be88261cca,2-673d0a4a69a242adeb9852979da1e295,2002.7,"Acconci, Vito",1320851809,Commission for the San Francisco International...,,"{""type"":""Point"",""coordinates"":[-122.389979,37....","Airport : International Terminal, transfer cor...",mixed medium,San Francisco Arts Commission,Light Beams for the Sky of Transfer Cooridor,"[None, None, None, None, None]"
4,63daabb62278559f8a0643be8826291a,2-7ffc3bec57f459d5e478899e0f129bf0,1977.33,"Adams, Gloria Cozzo",1320851809,Purchased by the San Francisco Art Commission ...,481/2,"{""type"":""Point"",""coordinates"":[-122.389979,37....",Airport : Business and Finance,Acrylic and ink on canvas,San Francisco Arts Commission,Cityscape #33,"[None, None, None, None, None]"
5,63daabb62278559f8a0643be88262f39,2-b62dd5bf7e5f6b7e8a62cd78a216d052,1985.5.3,"Adams, Mark",1320851809,Purchased by the San Francisco Art Commission ...,"84"" H X 144"" W","{""type"":""Point"",""coordinates"":[-122.389979,37....",Airport : Terminal 2 : Meeting Area,Flat weave wool tapestry,San Francisco Arts Commission,Garden Outside the Gate (The Garden Tapestries),"[None, None, None, None, None]"
6,63daabb62278559f8a0643be88263e67,2-0857c73da09a7df8750689d22872110d,1985.5.2,"Adams, Mark",1320851809,Purchased by the San Francisco Art Commission ...,"84"" H X 204"" W","{""type"":""Point"",""coordinates"":[-122.389979,37....",Airport : Terminal 2 : Meeting Area,Flat weave wool tapestry,San Francisco Arts Commission,Garden in San Andreas Valley (The Garden Tapes...,"[None, None, None, None, None]"
7,63daabb62278559f8a0643be882646e1,2-710e5caa71a55fd99577ae8e3a7ccbbf,1985.5.1,"Adams, Mark",1320851809,Purchased by the San Francisco Art Commission ...,"84"" H X 96"" W","{""type"":""Point"",""coordinates"":[-122.389979,37....",Airport : Terminal 2 : Meeting Area,Flat weave wool tapestry,San Francisco Arts Commission,Pond in Golden Gate Park (The Garden Tapestries),"[None, None, None, None, None]"
8,63daabb62278559f8a0643be88264f4a,2-6656bcb2f49f1ca2587677345cacf7b1,1902.1,"Aitken, Robert Ingersoll",1320851809,Funds raised by public subscription. Dedicate...,Figure: 12' H; Shaft: 83' H,"{""type"":""Point"",""coordinates"":[-122.4075,37.78...",Public Display : District 6 : Union Square,Bronze figure on granite base,San Francisco Arts Commission,"The Dewey Monument (Admiral George Dewey, 1837...","[None, None, None, None, None]"
9,63daabb62278559f8a0643be88265047,2-9fa9a901cf1dd3120ea9e85bd3334578,1899.1,"Aitken, Robert Ingersoll",1320851809,Commissioned in 1898 by the architect of the M...,20' H,"{""type"":""Point"",""coordinates"":[-122.4666711,37...",Public Display : District 1 : GGP: Music Conco...,Cast Concrete,San Francisco Arts Commission,Spandrels on Spreckles Temple of Music,"[None, None, None, None, None]"
10,63daabb62278559f8a0643be88265388,2-4324193c418aac1cc41ce2ac804ac039,1904.2,"Aitken, Robert Ingersoll",1320851809,Funds raised by popular subscription,"bronze is 15' H, base is 72 3/4 X 72 3/4 ""","{""type"":""Point"",""coordinates"":[-122.454557,37....","Public Display : District 5 : GGP, Entrance to...","Bronze, granite and marble",San Francisco Arts Commission,William McKinley,"[None, None, None, None, None]"
