In [39]:
import pandas as pd
import numpy as np 
import re

In [27]:
pd.set_option('display.max_rows', 500)


In [3]:
buddha = pd.read_csv("buddha.csv")
greek = pd.read_csv("greek.csv")
gandhara = pd.read_csv("gandhara.csv")

In [48]:
# Add types to each df
buddha["type"] = "Buddhist"
greek["type"] = "Greek"
gandhara["type"] = "Gandharan"

In [13]:
print(buddha["Production date"].isna().values.any())
print(greek["Production date"].isna().values.any())
print(gandhara["Production date"].isna().values.any())

False
False
True


In [28]:
buddha["Production date"].value_counts()

3rdC                                                      43
2ndC                                                      32
2ndC-3rdC                                                 31
4thC-7thC                                                 12
6thC                                                      11
4thC-5thC                                                 10
7thC-8thC                                                 10
5thC                                                       9
5thC-6thC                                                  8
6thC-7thC                                                  7
1stC                                                       6
7thC-8thC (circa)                                          6
3rdC-7thC (circa)                                          5
6thC (Whitfield 1985); 7thC-8thC (Ghose 2004)              4
7thC                                                       4
6thC-9thC                                                  2
1stC (circa)            

In [29]:
greek["Production date"].value_counts()

332BC-250BC                                                    48
3rdC BC-2ndC BC                                                44
2ndC BC-1stC BC                                                27
3rdC BC                                                        22
1stC BC-1stC                                                   20
2ndC BC                                                        15
1stC BC                                                        13
300BC (circa)                                                  11
3rdC BC - 2ndC BC                                               9
4thC BC(late)                                                   9
4thC BC                                                         9
350 BC - 300 BC                                                 7
3rdC BC - 1stC BC                                               7
250BC-200BC                                                     6
6thC BC - 2ndC BC                                               5
3rdC BC-1s

In [53]:
gandhara["Production date"].value_counts()
gandhara["Production date"].dropna(inplace=True)

In [40]:
def get_year(date_str):
    arr = [x.strip() for x in date_str.split("-")]
    BC_multiplier = 1
    if "BC" in arr[0]:
        BC_multiplier = -1
    number_groups = re.findall(r'\d+', arr[0])
    if len(number_groups[0]) == 1:
        year = int(number_groups[0]) * 100
    else:
        year = int(number_groups[0])
    return year * BC_multiplier

In [45]:
get_year("50 BC - AD 50")

-50

In [60]:
buddha["date"] = buddha["Production date"].apply(get_year)
greek["date"] = greek["Production date"].apply(get_year)
gandhara["date"] = gandhara["Production date"].apply(get_year)


In [61]:
all = pd.concat([buddha, greek, gandhara])

In [65]:
all["filename"] = all["id"] + ".jpg"

In [66]:
all["filename"]

0      18870717144.jpg
1       1892080111.jpg
2        189011161.jpg
3       1902100217.jpg
4       1880070940.jpg
            ...       
191         188054.jpg
192         188041.jpg
193      191405021.jpg
194        1880227.jpg
195     1925061920.jpg
Name: filename, Length: 993, dtype: object

In [67]:
all_selected = all[['filename', 'id', 'type', 'date']]

In [70]:
measurements = pd.read_csv("measurements.txt", sep="\t")

In [72]:
together = pd.merge(all_selected, measurements, on="filename")

In [73]:
together

Unnamed: 0,filename,id,type,date,imageID,brightness_median,brightness_stdev,saturation_median,saturation_stdev,hue_median,hue_stdev
0,18870717144.jpg,18870717144,Buddhist,-300.0,403,224,80.4079,2,54.1441,28,70.8063
1,1892080111.jpg,1892080111,Buddhist,200.0,445,218,63.1146,11,17.0227,26,4.4934
2,189011161.jpg,189011161,Buddhist,384.0,441,130,55.8103,32,25.6474,191,77.5272
3,1902100217.jpg,1902100217,Buddhist,200.0,506,215,63.9945,4,8.9645,177,91.6795
4,1880070940.jpg,1880070940,Buddhist,300.0,223,139,57.0025,72,25.9706,9,34.9751
...,...,...,...,...,...,...,...,...,...,...,...
988,188054.jpg,188054,Gandharan,200.0,334,170,63.5285,25,27.4960,228,101.8552
989,188041.jpg,188041,Gandharan,200.0,330,203,56.9162,7,40.3854,35,27.8027
990,191405021.jpg,191405021,Gandharan,200.0,609,244,71.3581,0,12.9680,0,34.1007
991,1880227.jpg,1880227,Gandharan,200.0,310,149,57.8868,3,15.1840,42,19.8062


In [81]:
together.drop(index=together[together["date"].max()])

KeyError: 18061811

In [75]:
together.to_csv("imageplot_all/metadata.txt", sep="\t", index=False)

In [77]:
together.max()

filename             greek94.jpg
id                       greek94
type                       Greek
date                 1.80618e+07
imageID                      994
brightness_median            255
brightness_stdev         99.7172
saturation_median            230
saturation_stdev         104.061
hue_median                   244
hue_stdev                118.297
dtype: object

In [78]:
together.min()

filename             12745.jpg
id                       12745
type                  Buddhist
date                     -6000
imageID                      1
brightness_median            0
brightness_stdev        4.8688
saturation_median            0
saturation_stdev        0.7659
hue_median                   0
hue_stdev                1.149
dtype: object