# GN Vector Creater

이 노트북은 artist_info.csv로부터 artist와 genre, nationality 정보를 읽고 artist에 대한 [genre, nationality] vector를 생성합니다.

### Import

In [30]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import argparse

### Read argument

In [31]:
parser = argparse.ArgumentParser()
parser.add_argument('--data_path', type=str, default="data/")
parser.add_argument('--file_name', type=str, default="artists_vector.csv")
parser.add_argument('--enable_multi_label', type=bool, default=False)
parser.add_argument('--save_vector_as_array', type=bool, default=False)

args = parser.parse_args('')

### Load artist_info.csv

In [32]:
artists_info = pd.DataFrame(pd.read_csv(args.data_path + 'artists_info.csv'))
artists_info.head()

Unnamed: 0,name,years,genre,nationality
0,Amedeo Modigliani,1884 - 1920,Expressionism,Italian
1,Vasiliy Kandinskiy,1866 - 1944,"Expressionism,Abstractionism",Russian
2,Diego Rivera,1886 - 1957,"Social Realism,Muralism",Mexican
3,Claude Monet,1840 - 1926,Impressionism,French
4,Rene Magritte,1898 - 1967,"Surrealism,Impressionism",Belgian


### Split genre, nationality

In [33]:
if(args.enable_multi_label):
    artists_info['genre'] = [g.split(',') for g in artists_info['genre']]
    artists_info['nationality'] = [nt.split(',') for nt in artists_info['nationality']]

### Get unique list

In [34]:
genre_key = []
for g in artists_info['genre']:
    if(args.enable_multi_label):
        for gg in g:
            genre_key.append(gg)
    else:
        genre_key.append(g)
genre_key = list(set(genre_key))
print(genre_key, ' total is ', len(genre_key))

['Northern Renaissance', 'Romanticism', 'Impressionism,Post-Impressionism', 'Cubism', 'Symbolism', 'Symbolism,Expressionism', 'High Renaissance', 'Expressionism,Abstractionism', 'Baroque', 'Impressionism', 'Mannerism', 'Early Renaissance', 'Post-Impressionism', 'Realism', 'Surrealism,Impressionism', 'Primitivism,Surrealism', 'Proto Renaissance', 'Suprematism', 'Pop Art', 'Abstract Expressionism', 'Byzantine Art', 'Surrealism', 'Symbolism,Post-Impressionism', 'Symbolism,Art Nouveau', 'Primitivism', 'Expressionism', 'Expressionism,Abstractionism,Surrealism', 'High Renaissance,Mannerism', 'Neoplasticism', 'Realism,Impressionism', 'Social Realism,Muralism']  total is  31


In [35]:
nationality_key = []
for nt in artists_info['nationality']:
    if(args.enable_multi_label):
        for ntt in nt:
            nationality_key.append(ntt)
    else:
        nationality_key.append(nt)

nationality_key = list(set(nationality_key))
print(nationality_key, ' total is ', len(nationality_key))

['Austrian', 'Belgian', 'Russian', 'Italian', 'Spanish,Greek', 'French,British', 'Mexican', 'Spanish', 'Flemish', 'British', 'French,Jewish,Belarusian', 'German', 'German,Swiss', 'American', 'Dutch', 'French', 'Norwegian']  total is  17


In [36]:
def encoding(key, value):
    if(args.save_vector_as_array):
        encoded_vec = np.zeros(len(key), np.byte)
        if(args.enable_multi_label):
            for v in value:
                encoded_vec[key.index(v)] = 1
        else:
            encoded_vec[key.index(value)] = 1
        return encoded_vec
    else:
        return key.index(value)

### Make DataFrame

In [37]:
df = pd.DataFrame()
df['index'] = range(50)
df['name'] = artists_info['name']
df['genre'] = [encoding(genre_key, ge) for ge in artists_info['genre']]
df['nationality'] = [encoding(nationality_key, nt) for nt in artists_info['nationality']]
df.head()

Unnamed: 0,index,name,genre,nationality
0,0,Amedeo Modigliani,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,1,Vasiliy Kandinskiy,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,2,Diego Rivera,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,3,Claude Monet,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,4,Rene Magritte,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


### Save CSV

In [38]:
df.to_csv(args.data_path + args.file_name)