# AESTHEMOS
For use with data collected from 

## TO-DO:
### fix missing IDS
* names w/ accents
* accents mess up comparisons -> can't ID
* Édouard Vuillard! -> Ãˆdouard Vuillard


### make function for computation and loop through all output files
### decide how to group
### remove all work except functions and loops

In [1]:
import pandas as pd
from numpy import array
import os

In [2]:
# Fixes numerical values in gallery files AND makes unique subject list
unique_subjects = set()
for gallery in os.listdir('data/aesthemos'):
    if gallery.endswith(".csv"):
        g = pd.read_csv("data/aesthemos/" + gallery).replace({'1 - Not at all':1, '7 - Strongly':7})
        g_subs = g['Subject #:'].tolist()
        z = [x.replace(' ', '').lower() for x in g_subs]
        unique_subjects = unique_subjects.union(set(z))
        g.to_csv("data/aesthemos/" + gallery, index=False)
list(unique_subjects)

['pilot01', 'pilot02']

In [3]:
# Setup key for painting IDs
IDsTable = pd.read_csv("paintingIDsKey.csv", encoding='latin1')

In [4]:
def clean_single_gallery(filesource, sub):
    file = pd.read_csv(filesource)
    sub_ser = pd.Series([x.replace(' ', '').lower() for x in file['Subject #:']])
    
    table = file.loc[sub_ser == sub].T.reset_index().drop([0,1]).rename(columns={"index": "Title"})
    if len(table.columns) > 1:
        table = (
            table
            .assign(category=table.Title.apply(lambda x: x.split(" ")[-1]))
            .assign(Name=table.Title.apply(lambda x: " ".join(x.split(" ")[:-1])))
            .rename(columns={ table.columns[-1]: "score"})
            .drop('Title', axis=1) 
         )
        table = table.assign(score=table.score.astype(int))
        t = table.pivot_table(index='Name', columns='category', aggfunc=(lambda x: x))
        t.columns = t.columns.levels[1].rename(None)
        t = t.rename(columns=lambda x: x[1:-1]).reset_index()
        art_split = t.Name.str.split(" by ", expand=True)
        t['Title'] = art_split[0]
        t['Artist'] = art_split[1]
        t = t.drop('Name', axis=1)
        t = pd.merge(t, IDsTable, how='left', on=['Title', 'Artist']) #SOME PAINTINGS DISSAPEAR
        return t
    else:
        print("ERROR: " + sub + " not in " + filesource)
        return;

In [5]:
# CONVERT ALL GALLERY FILES INTO SEPERATE SUBJECT CSVS
for subject in unique_subjects:
    current_sub = pd.DataFrame(
        columns = ["Title", "Artist" "Beautiful", "Calm", "Fascinated", "Funny", "Indifferent", "Moved", "Surprised", "Unsettled"]
    )
    for gallery in os.listdir('data/aesthemos'):
        if gallery.endswith(".csv"):
            fix_curr = clean_single_gallery("data/aesthemos/" + gallery, subject)
            if fix_curr is not None:
                fix_curr['Gallery'] = gallery.split()[1].replace(".csv", "")
                current_sub = current_sub.append(fix_curr, ignore_index = True, sort=True)
    current_sub.to_csv("output/"+ subject + "-groups.csv", index=False)
        

# Group Computation/Column Cleanup

In [6]:
def evaluate_subject_paintings(inputfilesource, outputsource = None):
    file = pd.read_csv(inputfilesource)
    df = file.sort_values("Indifferent") #lots of indifferent scores are the same...how do you want to further seperate
    indiff = df.tail(10)
    indiff["Group"] = "indiff"

    others = df.iloc[:-10]
    # create arousal (Moved + Fascinated + Funny + Surprised)
    arousal = others['Moved'] + others['Fascinated'] + others['Funny'] + others['Surprised']
    others["Arousal"] = arousal
    
    # add high/low column to others
    others = others.sort_values("Arousal")
    ### middle value split HILOW
    """
    low = others.head(int(len(others)/2))
    low["hilo"] = "lo"
    high = others.tail(int(len(others)/2))
    high["hilo"] = "hi"
    """
    ### Mean value split HILOW
    aro_mean = others["Arousal"].mean() + 0.00001 # making non int should eliminate = mean
    low = others[others["Arousal"] < aro_mean]
    low["hilo"] = "lo"
    high = others[others["Arousal"] > aro_mean]
    high["hilo"] = "hi"
    full_aro = pd.concat([low, high]).sort_values("Unsettled")
    
    
    
    # make pos/neg based off of Unsettled from full_aro
    ### middle value split
    """
    neg = full_aro.head(int(len(full_aro)/2))
    neg["PosNeg"] = "Neg"
    pos = full_aro.tail(int(len(full_aro)/2))
    pos["PosNeg"] = "Pos"
    """
    ### Mean value split
    unset_mean = full_aro["Unsettled"].mean() + 0.00001 # making non int should eliminate = mean
    neg = full_aro[full_aro["Unsettled"] < unset_mean]
    neg["PosNeg"] = "Neg"
    pos = full_aro[full_aro["Unsettled"] > unset_mean]
    pos["PosNeg"] = "Pos"
    
    #merge hilow/posneg and make groups col
    final_df = pd.concat([pos, neg])
    final_df["Group"] = final_df["hilo"] + final_df["PosNeg"]
    final_df = final_df.drop(columns=["hilo", "PosNeg"])
    
    # Add indiff and order columns
    final_df = final_df.append(indiff, ignore_index = True)
    cols = ["Title", "Artist", "Gallery", "Unique ID", "Moved", "Fascinated", "Funny", "Surprised", 
            "Indifferent", "Calm", "Unsettled", "Beautiful", "Arousal", "Group"]
    final_df = final_df[cols]
    
    # Setup sort for rows so that it knows order Group should go in
    final_df["Group"] = pd.Categorical(final_df['Group'], ["hiPos", "hiNeg", "loPos", "loNeg", "indiff"])
    final_df = final_df.sort_values(['Group','Gallery'])
    final_df
    
    if outputsource is None:
        final_df.to_csv(filesource, index=False)
    else:
        final_df.to_csv(outputsource, index=False)

In [7]:
for subject in unique_subjects:
    evaluate_subject_paintings("output/"+ subject + "-groups.csv", outputsource = "output/"+ subject + "-groupsGROUPED.csv")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/sta