In [103]:
import pandas as pd
import numpy as np
import datetime as dt

ths = pd.read_csv('THS.csv')
cols = ["ID","Location","Region","Denomination","Date1","Date2","Number","Notes","Bibliography"]
ths.columns = cols
ths.index = ths['ID']

# function to convert denominations to the standard nummi notation
def convert_denomination(df):
	di = {'K':'20 nummi', 'IS':'16 nummi', 'M':'40 nummi', 'B':'2 nummi', 'A':'1 nummus', 'H':'8 nummi', 'I':'10 nummi', 'E':'5 nummi', 'D':'4 nummi'}
	df = df.replace({'Denomination':di})
	return df

In [104]:
# function to build the coin_finds dataframe
def setting_coin_finds(ths):
    cols_finds = ['hoard_id', 'name', 'startDate', 'endDate', 'type_find', 'hoard?', 'excavation?', 'single?', 'num_coins', 'num_known_coins', 'year_found',
        'year_end_found', 'comments', 'bibliography', 'lat', 'long', 'certainty', 'owner', 'created', 'imported']
    coin_finds = pd.DataFrame(index=ths.index, columns=cols_finds)
    
    ids = pd.Series(ths.index).apply(str)   
    coin_finds['hoard_id'] = 'THS-' + (ids.values)
    coin_finds['name'] = ths['Location'] + ', ' + ths['Region'] + '(' + coin_finds['hoard_id'] + ')'
    coin_finds['place_small'] = ths['Location']
    coin_finds['place_large'] = ths['Region']
    coin_finds['startDate'] = ths['Date1']
    coin_finds['endDate'] = ths['Date2']
    coin_finds['hoard?'] = 0
    coin_finds['excavation?'] = 0
    coin_finds['single?'] = 1
    coin_finds['type_find'] = 'single find'
    coin_finds['num_coins'] = ths['Number']
    coin_finds['num_known_coins'] = ths['Number']
    coin_finds['owner'] = 'Andrei Gandila'
    coin_finds['created'] = pd.Timestamp.now()
    coin_finds['imported'] = pd.Timestamp.now()
    coin_finds['comments'] = ths['Notes']
    coin_finds['bibliography'] = ths['Bibliography']
    
    return coin_finds

In [105]:
# function to set the coin_groups dataframe
def setting_coin_groups(ths):
    cols = ['hoard_id', 'coin_group_id', 'start_year', 'end_year', 'revised_start', 'revised_end', 'ruler', 'revised_ruler',
        'denomination', 'num_coins', 'mint', 'imported', 'created', 'updated']
    coin_groups = pd.DataFrame(index=ths.index, columns=cols)

    ids = pd.Series(ths.index).apply(str)   
    coin_groups['hoard_id'] = 'THS-' + (ids.values)
    coin_groups['coin_group_id'] = coin_groups['hoard_id'] + '-1' # since these are all single finds
    coin_groups['start_year'] = ths['Date1']
    coin_groups['end_year'] = ths['Date2']
    coin_groups['revised_start'] = coin_groups['start_year']
    coin_groups['revised_end'] = coin_groups['end_year']
    coin_groups['ruler'] = 'placeholder'				# 31.1 need to correct this
    coin_groups['revised_ruler'] = coin_groups['ruler']
    coin_groups['denomination'] = ths['Denomination']
    coin_groups['num_coins'] = ths['Number']
    coin_groups['mint'] = 'Thessaloniki'
    coin_groups['imported'] = pd.Timestamp.now()	# this and the next two lines are identical because importing should be a one-off thing.
    coin_groups['created'] = pd.Timestamp.now()
    coin_groups['updated'] = pd.Timestamp.now()

    return coin_groups

In [106]:
ths = convert_denomination(ths)

coin_finds = setting_coin_finds(ths)
coin_groups = setting_coin_groups(ths)

In [107]:
import requests, json

def get_coordinates(place_name):
    gKey = 'AIzaSyAEhSDZteGTpcXp9dYNUhB1AhHuF9r1kFo'
    geoURL = 'https://maps.googleapis.com/maps/api/geocode/json?address=' + place_name + '&key=' + gKey

    r = requests.get(geoURL)
    temp = json.loads(r.text)
    if temp['results'] == []:
        return False
    else:
        lat = temp['results'][0]['geometry']['location']['lat']
        lng = temp['results'][0]['geometry']['location']['lng']
        return([lat, lng])

In [108]:
try:
    coordinates_df = pd.read_csv('coordinates.csv')

except:
    temp = coin_finds['place_small'][:]
    temp_set = set(temp)
    #len(temp_set)
    cols = ['Lat', 'Lng']
    coordinates_df = pd.DataFrame(list(temp_set), columns=['Name'])
    coordinates_df['Lat'] = np.nan
    coordinates_df['Lng'] = np.nan

    coordinates = coordinates_df['Name'].apply(get_coordinates)
    
    lats = []
    lngs = []
    for i in coordinates:
        if i != False:
            lats.append(i[0])
            lngs.append(i[1])
        else:
            lats.append(False)
            lngs.append(False)

    coordinates_df['Lat'] = lats
    coordinates_df['Lng'] = lngs
    coordinates_df = coordinates_df.set_index('Name')

    coordinates_df.to_csv('coordinates.csv')

In [109]:
# uses df to populate coin_finds
for i in range(len(coordinates_df.index)):
    coin_finds.loc[coin_finds.place_small == coordinates_df.index[i], 'lat'] = coordinates_df['Lat'].iloc[i]
    coin_finds.loc[coin_finds.place_small == coordinates_df.index[i], 'long'] = coordinates_df['Lng'].iloc[i]

coin_finds

Unnamed: 0_level_0,hoard_id,name,startDate,endDate,type_find,hoard?,excavation?,single?,num_coins,num_known_coins,...,comments,bibliography,lat,long,certainty,owner,created,imported,place_small,place_large
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,THS-1,"Adamclisi, Thracia (QE)(THS-1)",572,573,single find,0,0,1,1,1,...,,"Vertan, Custurea, Pontica 14, 1981, 341",,,,Andrei Gandila,2018-02-08 17:19:31.513621,2018-02-08 17:19:31.514365,Adamclisi,Thracia (QE)
2,THS-2,"Enisala, Thracia (QE)(THS-2)",574,575,single find,0,0,1,1,1,...,,"Vertan, Custurea, Pontica 19, 1986, 300, no. 1084",,,,Andrei Gandila,2018-02-08 17:19:31.513621,2018-02-08 17:19:31.514365,Enisala,Thracia (QE)
3,THS-3,"Harsova, Thracia (QE)(THS-3)",566,567,single find,0,0,1,1,1,...,,"Vertan, Custurea, Pontica 19, 1986, 301, no. 1091",,,,Andrei Gandila,2018-02-08 17:19:31.513621,2018-02-08 17:19:31.514365,Harsova,Thracia (QE)
4,THS-4,"Harsova, Thracia (QE)(THS-4)",567,568,single find,0,0,1,1,1,...,,"Vertan, Custurea, Pontica 19, 1986, 301, no. 1092",,,,Andrei Gandila,2018-02-08 17:19:31.513621,2018-02-08 17:19:31.514365,Harsova,Thracia (QE)
5,THS-5,"Harsova, Thracia (QE)(THS-5)",569,570,single find,0,0,1,2,2,...,,"Vertan, Custurea, Pontica 19, 1986, 301, no. 1...",,,,Andrei Gandila,2018-02-08 17:19:31.513621,2018-02-08 17:19:31.514365,Harsova,Thracia (QE)
6,THS-6,"Harsova, Thracia (QE)(THS-6)",574,575,single find,0,0,1,1,1,...,,"Vertan, Custurea, Pontica 19, 1986, 301, no. 1095",,,,Andrei Gandila,2018-02-08 17:19:31.513621,2018-02-08 17:19:31.514365,Harsova,Thracia (QE)
7,THS-7,"Harsova, Thracia (QE)(THS-7)",568,578,single find,0,0,1,2,2,...,,"Vertan, Custurea, Pontica 19, 1986, 301, no. 1...",,,,Andrei Gandila,2018-02-08 17:19:31.513621,2018-02-08 17:19:31.514365,Harsova,Thracia (QE)
8,THS-8,"Mangalia, Thracia (QE)(THS-8)",566,567,single find,0,0,1,1,1,...,,"Vertan, Custurea, Pontica 19, 1986, 301, no. 1104",,,,Andrei Gandila,2018-02-08 17:19:31.513621,2018-02-08 17:19:31.514365,Mangalia,Thracia (QE)
9,THS-9,"Mangalia, Thracia (QE)(THS-9)",569,570,single find,0,0,1,1,1,...,,"Vertan, Custurea, Pontica 19, 1986, 301, no. 1105",,,,Andrei Gandila,2018-02-08 17:19:31.513621,2018-02-08 17:19:31.514365,Mangalia,Thracia (QE)
10,THS-10,"Sacidava, Thracia (QE)(THS-10)",585,586,single find,0,0,1,1,1,...,,"Custurea, Vertan, Talmatchi, Pontica 32, 1999,...",,,,Andrei Gandila,2018-02-08 17:19:31.513621,2018-02-08 17:19:31.514365,Sacidava,Thracia (QE)


In [110]:
coin_groups.head(2)

Unnamed: 0_level_0,hoard_id,coin_group_id,start_year,end_year,revised_start,revised_end,ruler,revised_ruler,denomination,num_coins,mint,imported,created,updated
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,THS-1,THS-1-1,572,573,572,573,placeholder,placeholder,20 nummi,1,Thessaloniki,2018-02-08 17:19:31.523589,2018-02-08 17:19:31.524108,2018-02-08 17:19:31.524575
2,THS-2,THS-2-1,574,575,574,575,placeholder,placeholder,20 nummi,1,Thessaloniki,2018-02-08 17:19:31.523589,2018-02-08 17:19:31.524108,2018-02-08 17:19:31.524575


In [111]:
with open('ths-bib.txt') as f:
    content = f.readlines()
content = [x.strip() for x in content]

In [112]:
import re
regex = r'([0-9]{4})|([0-9]{4}-[0-9]{4})'

# gets all the required information from the bibliography
def get_info_from_bib(text_line):
    space_loc = text_line.find(" ")
    author = text_line[:space_loc]
    equal_loc = text_line.find(" =")
    
    temp = re.search(regex, text_line)
    years = temp.group() # covers the years in XXXX or XXXX-YYYY format
    
    year_start = temp.start()
    long_author = text_line[:int(year_start)-1]
    
    reference = text_line[equal_loc+3:]
    
    return [author, years, long_author, reference]

#tested and this works
#for i in content[:20]:
#    res = get_info_from_bib(i)
#    print(res)

biblio = []
for i in content[:]:
    #print(i)
    biblio.append(get_info_from_bib(i))

bibliography = pd.DataFrame(biblio, columns=['Author', 'Year', 'Long_Author', 'Reference'])
bibliography.tail(3)

Unnamed: 0,Author,Year,Long_Author,Reference
347,Vertan,1995,Vertan & Custurea,"A. Vertan & G. Custurea, Descoperiri monetare ..."
348,Vîlcu,2010,Vîlcu & Nicolae,"A. Vîlcu & E. Nicolae, Monede bizantine descop..."
349,Williams,1991,Williams & Zervos,"C. K. Williams II & O. Zervos, Corinth, 1990: ..."


In [113]:
# this gets all the required information from an entry in the database
def get_info_from_db(text_line):
    comma_loc = text_line.find(",")
    author = text_line[:comma_loc]
    
    temp = re.search(regex, text_line)
    try:
        years = temp.group() # covers the years in XXXX or XXXX-YYYY format
    except:
        #print(text_line)
        years = ''

    return [author, years]

# tested and this works
#test_list = list(coin_finds['bibliography'].head(50))
#for i in test_list:
#    print(get_info_from_db(i))

In [118]:
bibliography['Author'] = bibliography['Author'].str.replace('ț', 't')
bibliography['Author'] = bibliography['Author'].str.replace('ţ', 't')
bibliography['Author'] = bibliography['Author'].str.replace('ä', 'a')
bibliography['Author'] = bibliography['Author'].str.replace('é', 'e')
bibliography['Author'] = bibliography['Author'].str.replace('ö', 'o')
bibliography['Author'] = bibliography['Author'].str.replace('ș', 's')
bibliography['Author'] = bibliography['Author'].str.replace('ș', 's')
bibliography['Author'] = bibliography['Author'].str.replace('ă', 'a')
bibliography['Author'] = bibliography['Author'].str.replace('ç', 'c')
bibliography['Author'] = bibliography['Author'].str.replace('Ç', 'C')
bibliography['Author'] = bibliography['Author'].str.replace('î', 'i')
bibliography['Author'] = bibliography['Author'].str.replace('í', 'i')
bibliography['Author'] = bibliography['Author'].str.replace('ć', 'c')
bibliography['Author'] = bibliography['Author'].str.replace('š', 's')
bibliography['Author'] = bibliography['Author'].str.replace('Š', 'S')
bibliography['Author'] = bibliography['Author'].str.replace('č', 'c')
bibliography['Author'] = bibliography['Author'].str.replace('ğ', 'g')
bibliography['Author'] = bibliography['Author'].str.replace('Ü', 'U')
bibliography['Author'] = bibliography['Author'].str.replace('ł', 'l')
bibliography['Author'] = bibliography['Author'].str.replace('ı', 'i')

def get_full_reference(author_name, year):
    author_works = bibliography[bibliography['Author'] == author_name]
    if author_name == 'MINAC 2004': return 'Partial reference: '
    if author_name[0:4] == 'MNIR': return 'Partial reference: '
    if author_name[0:4] == 'Info': return 'Personal reference: '
    
    author_year = author_works[author_works['Year'] == year]
    # missing references:
    if (author_name == 'Custurea') and (year == '1995'): return 'Missing reference?: '
    if (author_name == 'Vertan') and (year == '1997'): return 'Missing reference?: '
    if (author_name[0:5] == 'Waage') and (year == '1952'): return 'Missing reference?: '
    if (author_name[0:6] == 'Zeliha'): return 'Missing reference?: '
    if (author_name[0:6] == 'Cherub'): return 'Missing reference?: '
    if (author_name[0:10] == 'Dekoulakou'): return 'Missing reference?: '
    
    # corrupted references - MUST verify these results, some of their bibliographies are corrupted or uncertain based on the existing list I received:
    if (author_name == 'Poenaru, Ocheseanu, BSNR 86-87, 88, no. 5'): return 'Corrupted reference?: '
    if (author_name == 'Poenaru') and (year == ''): return 'Corrupted reference?: '
    if (author_name[0:8] == 'Nudelman'): return 'A. A. Nudel’man, Topografiia kladov i nakhodok edinichnykh monet, Kishinew.'
    if (author_name[0:8] == 'Mussorov'): return 'A. I. Mussurov & L. V. Nosova, Nakhodki vizantiiskikh monet V-VI vv. nа Nizhnem Dnestre, Stratum+ 6, p. 304-306.'
    if (author_name[0:6] == 'Teodor'): return 'D. G. Teodor, Teritoriul est-carpatic în veacurile V-XI e.n.: contribuții arheologice și istorice la problema formării poporului român, Iaşi.'
    if (author_name[0:6] == 'Velter'): return 'A.-M. Velter, Transilvania în secolele V-XII. Interpretări istorico-politice și economice pe baza descoperirilor monetare din bazinul Carpatic, secolele V-XII, Bucharest.'
    
    if (author_name[0:9] == 'Stoliarik'): return 'E. Stolyarik, Essays on monetary circulation in the North-Western Black Sea region in the Late Roman and Early Byzantine periods: late 3rd century – early 13th century A.D., Odessa.'
    if (author_name[0:7] == 'Somogyi'): return 'P. Somogyi, Byzantinische Fundmünzen der Awarenzeit. Einem Bestandsaufnahme, 1998-2007, Acta Archaeologica Carpathica 42-43, p. 231-298.'
    if (author_name[0:12] == 'Abramishvili') and (year == ''): return 'Nokalakevis arkeologiuri ekspeditsiis mier bolo ts’legshi gamovlenili numizmat’ik’uri masla, in P. Zakaraia (ed.), Nokalakevi-Arkeopolisi: III. Arkeologiuri gatchrebi 1983-1989, Tbilisi, p. 270-272.'
    if (author_name[0:6] == 'Mirnik') and (year == '1998' or year == ''): return 'I. Mirnik & A. Šemrov, Byzantine coins in the Zagreb Archaeological Museum numismatic collection. Anastasius I (A.D. 497-518) - Anastasius II (A.D. 713–715), Vjesnik Arheološkog Muzeja u Zagrebu 30-31, p. 129-258.'
    if (author_name[0:11] == 'Georganteli'): return "E. Georganteli, L’espace rural dans la province de Rhodope; le témoignage de la numismatique, in J. Lefort, C. Morrisson & J.-P. Sodini (eds.), Les villages dans l'Empire byzantin (IVe-XVe siècle), Paris, p. 307-318."

    if (author_name[0:10] == 'Mihailescu'): return 'V. Mihăilescu-Bîrliba & C. Mihai, Descoperiri monetare la Târgu Frumos, jud. Iaşi, Arheologia Moldovei 19, p. 253-259.'
    if (author_name[0:6] == 'Winter'): return 'Die byzantinischen Fundmünzen aus dem österreichischen Bereich der Avaria, in F. Daim (ed.), Die Awaren am Rand der byzantinischen Welt: Studien zu Diplomatie, Handel und Technologietransfer im Frühmittelalter, Innsbruck, 45-66.'
    if (author_name[0:13] == 'Sagalassos IV'): return 'S. Scheers, Coins found in 1994 and 1995, in M. Waelkens & J. Poblome (eds.), Sagalassos IV. Report on the survey and excavation campaigns of 1994 and 1995, Leuven, p. 315-350.'
    if (author_name[0:12] == 'Sagalassos V'): return 'S. Scheers, Coins found in 1996 and 1997, in M. Waelkens & L. Loots (eds.), Sagalassos V. Report on the survey and excavation campaigns of 1996–1997, Leuven, 509-549.'
    if (author_name[0:7] == 'Ireland'): return 'S. Ireland, Greek, Roman and Byzantine coins in the museum at Amasya, London.'
    
    if (author_name[0:7] == 'Goricke'): return 'H. Göricke-Lukić, Justinijanov novac iz Slavonije i Baranje, in N. Cambi & E. Marin (eds.), Radovi XIII. Meðunarodnog kongresa za starokrščansku arheologija. Split-Poreč '
    if (author_name[0:7] == 'Vetters'): return 'S. Karwiese, Ephesos 1980: Liste der Fundmünzen, in H. Vetters (ed.), Ephesos. Vorläufiger Grabungsbericht 1980, Vienna, p. 154-160.'
    if (author_name[0:4] == 'Okcu'): return 'R. Okçu (ed.), The Archaeological Museum of Bursa: coin exhibition catalogue, Bursa.'
    if (author_name[0:4] == 'Bell'): return 'Missing reference: '
    if (author_name[0:7] == 'Tsourti'): return 'E. Tsourti, Antikyra Boiotias. Nomismatiki marturia, in L. Kypraiou (ed.), Thorakion: aphieroma ste mneme tou Paulou Lazaride, Athens, p. 123-128.'
    if (author_name[0:8] == 'Boshkova'): return 'B. Bozhkova, Monetni nakhodki ot arkheologicheski kompleks (IV-VII v.) “Iuzhen Park – Lozenets”, Sofiia, Numizmatika, sfragistika i epigrafica 1, p. 73-86.'
    if (author_name[0:3] == 'Kos'): return 'P. Kos, The monetary circulation in the Southeastern Alpine region ca 300 B.C. - A.D. 1000, Situla 24, p. 1-254.'
    
    if (author_name[0:6] == 'Fisher'):
        if (year == '1971'): return 'C. K. Williams II & J. Fisher, Corinth, 1970: Forum area, Hesperia 40, no. 1, p. 1-51.'
        if (year == '1975'): return 'C. K. Williams II & J. Fisher, Corinth, 1974: Forum Southwest, Hesperia 44, no. 1, p. 1-50.'
        if (year == '1976'): return 'C. K. Williams II & J. Fisher, Corinth, 1975: Forum Southwest, Hesperia 45, no. 2, p. 99-162.'
    if (author_name[0:6] == 'Zervos'): 
        if year == '1986': return 'C. K. Williams II & O. H. Zervos, Corinth, 1985: east of the theater, Hesperia 55, no. 2, p. 129-175. '
        if year == '1991': return 'C. K. Williams II & O. H. Zervos, Corinth, 1985: east of the theater, Hesperia 55, no. 2, p. 129-175. '
    
    
    # the reference below is missing a year in the database
    if (author_name == 'Oberlander') and (year == ''): return 'E. Oberländer-Târnoveanu & E.-M. Constantinescu, Monede romane târzii şi bizantine din colecţia Muzeului Judeţean Buzău, Mousaios 4, p. 311-341.'
    if (author_name == 'Butnariu'): return 'V. M. Butnariu, Răspîndirea monedelor bizantine din secolele VI-VII în teritoriile carpato-dunărene, Buletinul Societății Numismatice Române 131-133, p. 199-235.'
    if (author_name[0:6] == 'Arslan'): return 'E. Arslan (ed.), Repertorio dei ritrovamenti di moneta Altomedievale in Italia (489-1002). http://www.ermannoarslan.it/Repertorio/index.php (last update, 30 August, 2016).'
    
    # this has a discrepancy between year in dataset and year in bibliography
    if (author_name[0:7] == 'Lazarov'): return 'L. Lazarov, Moneti ot 16 numii na Iustinian I, secheni v Tesalonika i otkriti v Bulgariia, Numizmatika 25, no. 1-2, p. 16-22.'

    space_loc = author_name.find(" ")
    if space_loc > -1:
        #print(author_name, end=", ")
        author_name = author_name[:space_loc]
        author_works = bibliography[bibliography['Author'] == author_name]
        author_year = author_works[author_works['Year'] == year]
        #print(author_name, author_year)
    
    try:
        reference = list(author_year['Reference'])[0]
        return reference
    except:
        print('Error in get_full_reference: ', end=' ')
        print(author_name + "-" + year)

get_full_reference('Mihaylov', '2014')

'S. Mihaylov, Moneti ot Tesalonika s nominal 16 numii, otkriti v teritoriiata na rannovizantiiskata provintsiia Vtora Miziia, in Medieval man and his world. Studies in honor of the 70th anniversary of Prof. Dr. Habil. Kazimir Popkonstantinov, Veliko Turnovo, p. 609-620.'

In [119]:
test_list = list(coin_finds['bibliography'])
full_references = []
for i in test_list:
    author_and_date = get_info_from_db(i)
    ref = get_full_reference(author_and_date[0], author_and_date[1])
    full_references.append(ref)
print(len(full_references))

1228


In [125]:
# a few additions:
coin_finds['full_bibliography'] = full_references
coin_groups['dynasty'] = 'Eastern Roman Empire'

In [130]:
def get_ruler(start, end):
    if start >= 518 and end <= 527: return 'Justin I'
    if start >= 527 and end <= 565: return 'Justinian I'
    if start >= 565 and end <= 578: return 'Justin II'
    if start >= 578 and end <= 582: return 'Tiberius II Constantine'
    if start >= 582 and end <= 602: return 'Maurice'
    if start >= 602 and end <= 608: return 'Phokas'
    return 'Heraklios'
    
coin_groups['start_year'].max()

629