In [1]:
import pandas as pd
import numpy as np
import datetime as dt

ths = pd.read_csv('THS.csv')
cols = ["ID","Location","Region","Denomination","Date1","Date2","Number","Notes","Bibliography"]
ths.columns = cols
ths.index = ths['ID']

# function to convert denominations to the standard nummi notation
def convert_denomination(df):
	di = {'K':'20 nummi', 'IS':'16 nummi', 'M':'40 nummi', 'B':'2 nummi', 'A':'1 nummus', 'H':'8 nummi', 'I':'10 nummi', 'E':'5 nummi', 'D':'4 nummi'}
	df = df.replace({'Denomination':di})
	return df

In [2]:
# function to build the coin_finds dataframe
def setting_coin_finds(ths):
    cols_finds = ['hoard_id', 'name', 'startDate', 'endDate', 'type_find', 'hoard?', 'excavation?', 'single?', 'num_coins', 'num_known_coins', 'year_found',
        'year_end_found', 'comments', 'bibliography', 'lat', 'long', 'certainty', 'owner', 'created', 'imported']
    coin_finds = pd.DataFrame(index=ths.index, columns=cols_finds)
    
    ids = pd.Series(ths.index).apply(str)   
    coin_finds['hoard_id'] = 'THS-' + (ids.values)
    coin_finds['name'] = ths['Location'] + ', ' + ths['Region'] + '(' + coin_finds['hoard_id'] + ')'
    coin_finds['place_small'] = ths['Location']
    coin_finds['place_large'] = ths['Region']
    coin_finds['startDate'] = ths['Date1']
    coin_finds['endDate'] = ths['Date2']
    coin_finds['hoard?'] = 0
    coin_finds['excavation?'] = 0
    coin_finds['single?'] = 1
    coin_finds['type_find'] = 'single find'
    coin_finds['num_coins'] = ths['Number']
    coin_finds['num_known_coins'] = ths['Number']
    coin_finds['certainty'] = 'highest' #meaning certain
    coin_finds['owner'] = 'THS AG'
    coin_finds['created'] = pd.Timestamp.now()
    coin_finds['imported'] = pd.Timestamp.now()
    coin_finds['comments'] = ths['Notes']
    coin_finds['bibliography'] = ths['Bibliography']
    
    # sort out certainty
    uncertain = ['Yapi Credi', 'Crete', 'Banat', 'Palestine', 'Moesia Secunda', 'Bucovina', 'Lazica', 'Dobrudja', 'Oltenia', 'Transylvania']
    very_uncertain = ['Mesopotamia', 'Anatolia']

    # marking very_uncertain places as 3, uncertain places as 2
    coin_finds.loc[coin_finds['place_small'].isin(very_uncertain), 'certainty'] = 'lowest'
    coin_finds.loc[coin_finds['place_small'].isin(uncertain), 'certainty'] = 'lower'
    
    return coin_finds

In [3]:
# function to set the coin_groups dataframe
def setting_coin_groups(ths):
    cols = ['hoard_id', 'coin_group_id', 'start_year', 'end_year', 'revised_start', 'revised_end', 'ruler',
        'dynasty', 'denomination', 'num_coins', 'mint', 'imported', 'created', 'updated']
    coin_groups = pd.DataFrame(index=ths.index, columns=cols)

    ids = pd.Series(ths.index).apply(str)   
    coin_groups['hoard_id'] = 'THS-' + (ids.values)
    coin_groups['coin_group_id'] = coin_groups['hoard_id'] + '-1' # since these are all single finds
    coin_groups['start_year'] = ths['Date1']
    coin_groups['end_year'] = ths['Date2']
    coin_groups['revised_start'] = coin_groups['start_year']
    coin_groups['revised_end'] = coin_groups['end_year']
    coin_groups['ruler'] = 'placeholder'
    coin_groups['dynasty'] = 'Eastern Roman Empire' # filling in dynasty
    coin_groups['denomination'] = ths['Denomination']
    coin_groups['num_coins'] = ths['Number']
    coin_groups['mint'] = 'Thessaloniki'
    coin_groups['imported'] = pd.Timestamp.now()	# this and the next two lines are identical because importing should be a one-off thing.
    coin_groups['created'] = pd.Timestamp.now()
    coin_groups['updated'] = pd.Timestamp.now()

    return coin_groups

In [4]:
# this cell was used for getting coordinates from Google; now obsolete since getting coordinates from Andrei's file.
'''
import requests, json

def get_coordinates(place_name):
    gKey = 'AIzaSyAEhSDZteGTpcXp9dYNUhB1AhHuF9r1kFo'
    geoURL = 'https://maps.googleapis.com/maps/api/geocode/json?address=' + place_name + '&key=' + gKey

    r = requests.get(geoURL)
    temp = json.loads(r.text)
    if temp['results'] == []:
        return False
    else:
        lat = temp['results'][0]['geometry']['location']['lat']
        lng = temp['results'][0]['geometry']['location']['lng']
        return([lat, lng])


try:
    coordinates_df = pd.read_csv('coordinates.csv')

except:
    temp = coin_finds['place_small'][:]
    temp_set = set(temp)
    #len(temp_set)
    cols = ['Lat', 'Lng']
    coordinates_df = pd.DataFrame(list(temp_set), columns=['Name'])
    coordinates_df['Lat'] = np.nan
    coordinates_df['Lng'] = np.nan

    coordinates = coordinates_df['Name'].apply(get_coordinates)
    
    lats = []
    lngs = []
    for i in coordinates:
        if i != False:
            lats.append(i[0])
            lngs.append(i[1])
        else:
            lats.append(False)
            lngs.append(False)

    coordinates_df['Lat'] = lats
    coordinates_df['Lng'] = lngs
    coordinates_df = coordinates_df.set_index('Name')

    coordinates_df.to_csv('coordinates.csv')
'''

# uses df to populate coin_finds
'''
for i in range(len(coordinates_df.index)):
    coin_finds.loc[coin_finds.place_small == coordinates_df.index[i], 'lat'] = coordinates_df['Lat'].iloc[i]
    coin_finds.loc[coin_finds.place_small == coordinates_df.index[i], 'long'] = coordinates_df['Lng'].iloc[i]

coin_finds.head()
'''

"\nfor i in range(len(coordinates_df.index)):\n    coin_finds.loc[coin_finds.place_small == coordinates_df.index[i], 'lat'] = coordinates_df['Lat'].iloc[i]\n    coin_finds.loc[coin_finds.place_small == coordinates_df.index[i], 'long'] = coordinates_df['Lng'].iloc[i]\n\ncoin_finds.head()\n"

In [5]:
import re
regex = r'([0-9]{4})|([0-9]{4}-[0-9]{4})'

# gets all the required information from the bibliography
def get_info_from_bib(text_line):
    space_loc = text_line.find(" ")
    author = text_line[:space_loc]
    equal_loc = text_line.find(" =")
    
    temp = re.search(regex, text_line)
    years = temp.group() # covers the years in XXXX or XXXX-YYYY format
    
    year_start = temp.start()
    long_author = text_line[:int(year_start)-1]
    
    reference = text_line[equal_loc+3:]
    
    return [author, years, long_author, reference]

In [6]:
# this gets all the required information from a bibliographic entry in the database
def get_info_from_db(text_line):
    comma_loc = text_line.find(",")
    author = text_line[:comma_loc]
    
    temp = re.search(regex, text_line)
    try:
        years = temp.group() # covers the years in XXXX or XXXX-YYYY format
    except:
        #print(text_line)
        years = ''

    return [author, years]

# tested and this works
#test_list = list(coin_finds['bibliography'].head(50))
#for i in test_list:
#    print(get_info_from_db(i))

In [7]:
# reading & fixing bibliographic entries
def get_bibliography():
    # reads bibliographic entries
    with open('ths-bib.txt') as f:
        content = f.readlines()

    content = [x.strip() for x in content]
    biblio = []
    for i in content[:]:
        #print(i)
        biblio.append(get_info_from_bib(i))
   
    bibliography = pd.DataFrame(biblio, columns=['Author', 'Year', 'Long_Author', 'Reference'])
    bibliography['Author'] = bibliography['Author'].str.replace('ț', 't')
    bibliography['Author'] = bibliography['Author'].str.replace('ţ', 't')
    bibliography['Author'] = bibliography['Author'].str.replace('ä', 'a')
    bibliography['Author'] = bibliography['Author'].str.replace('é', 'e')
    bibliography['Author'] = bibliography['Author'].str.replace('ö', 'o')
    bibliography['Author'] = bibliography['Author'].str.replace('ș', 's')
    bibliography['Author'] = bibliography['Author'].str.replace('ș', 's')
    bibliography['Author'] = bibliography['Author'].str.replace('ă', 'a')
    bibliography['Author'] = bibliography['Author'].str.replace('ç', 'c')
    bibliography['Author'] = bibliography['Author'].str.replace('Ç', 'C')
    bibliography['Author'] = bibliography['Author'].str.replace('î', 'i')
    bibliography['Author'] = bibliography['Author'].str.replace('í', 'i')
    bibliography['Author'] = bibliography['Author'].str.replace('ć', 'c')
    bibliography['Author'] = bibliography['Author'].str.replace('š', 's')
    bibliography['Author'] = bibliography['Author'].str.replace('Š', 'S')
    bibliography['Author'] = bibliography['Author'].str.replace('č', 'c')
    bibliography['Author'] = bibliography['Author'].str.replace('ğ', 'g')
    bibliography['Author'] = bibliography['Author'].str.replace('Ü', 'U')
    bibliography['Author'] = bibliography['Author'].str.replace('ł', 'l')
    bibliography['Author'] = bibliography['Author'].str.replace('ı', 'i')
    return bibliography

def get_full_reference(author_name, year, bibliography):
    author_works = bibliography[bibliography['Author'] == author_name]
    if author_name == 'MINAC 2004': return 'Partial reference: '
    if author_name[0:4] == 'MNIR': return 'Partial reference: '
    if author_name[0:4] == 'Info': return 'Personal reference: '
    
    author_year = author_works[author_works['Year'] == year]
    # missing references:
    if (author_name == 'Custurea') and (year == '1995'): return 'Missing reference?: '
    if (author_name == 'Vertan') and (year == '1997'): return 'Missing reference?: '
    if (author_name[0:5] == 'Waage') and (year == '1952'): return 'Missing reference?: '
    if (author_name[0:6] == 'Zeliha'): return 'Missing reference?: '
    if (author_name[0:6] == 'Cherub'): return 'Missing reference?: '
    if (author_name[0:10] == 'Dekoulakou'): return 'Missing reference?: '
    
    # corrupted references - MUST verify these results, some of their bibliographies are corrupted or uncertain based on the existing list I received:
    if (author_name == 'Poenaru, Ocheseanu, BSNR 86-87, 88, no. 5'): return 'Corrupted reference?: '
    if (author_name == 'Poenaru') and (year == ''): return 'Corrupted reference?: '
    if (author_name[0:8] == 'Nudelman'): return 'A. A. Nudel’man, Topografiia kladov i nakhodok edinichnykh monet, Kishinew.'
    if (author_name[0:8] == 'Mussorov'): return 'A. I. Mussurov & L. V. Nosova, Nakhodki vizantiiskikh monet V-VI vv. nа Nizhnem Dnestre, Stratum+ 6, p. 304-306.'
    if (author_name[0:6] == 'Teodor'): return 'D. G. Teodor, Teritoriul est-carpatic în veacurile V-XI e.n.: contribuții arheologice și istorice la problema formării poporului român, Iaşi.'
    if (author_name[0:6] == 'Velter'): return 'A.-M. Velter, Transilvania în secolele V-XII. Interpretări istorico-politice și economice pe baza descoperirilor monetare din bazinul Carpatic, secolele V-XII, Bucharest.'
    
    if (author_name[0:9] == 'Stoliarik'): return 'E. Stolyarik, Essays on monetary circulation in the North-Western Black Sea region in the Late Roman and Early Byzantine periods: late 3rd century – early 13th century A.D., Odessa.'
    if (author_name[0:7] == 'Somogyi'): return 'P. Somogyi, Byzantinische Fundmünzen der Awarenzeit. Einem Bestandsaufnahme, 1998-2007, Acta Archaeologica Carpathica 42-43, p. 231-298.'
    if (author_name[0:12] == 'Abramishvili') and (year == ''): return 'Nokalakevis arkeologiuri ekspeditsiis mier bolo ts’legshi gamovlenili numizmat’ik’uri masla, in P. Zakaraia (ed.), Nokalakevi-Arkeopolisi: III. Arkeologiuri gatchrebi 1983-1989, Tbilisi, p. 270-272.'
    if (author_name[0:6] == 'Mirnik') and (year == '1998' or year == ''): return 'I. Mirnik & A. Šemrov, Byzantine coins in the Zagreb Archaeological Museum numismatic collection. Anastasius I (A.D. 497-518) - Anastasius II (A.D. 713–715), Vjesnik Arheološkog Muzeja u Zagrebu 30-31, p. 129-258.'
    if (author_name[0:11] == 'Georganteli'): return "E. Georganteli, L’espace rural dans la province de Rhodope; le témoignage de la numismatique, in J. Lefort, C. Morrisson & J.-P. Sodini (eds.), Les villages dans l'Empire byzantin (IVe-XVe siècle), Paris, p. 307-318."

    if (author_name[0:10] == 'Mihailescu'): return 'V. Mihăilescu-Bîrliba & C. Mihai, Descoperiri monetare la Târgu Frumos, jud. Iaşi, Arheologia Moldovei 19, p. 253-259.'
    if (author_name[0:6] == 'Winter'): return 'Die byzantinischen Fundmünzen aus dem österreichischen Bereich der Avaria, in F. Daim (ed.), Die Awaren am Rand der byzantinischen Welt: Studien zu Diplomatie, Handel und Technologietransfer im Frühmittelalter, Innsbruck, 45-66.'
    if (author_name[0:13] == 'Sagalassos IV'): return 'S. Scheers, Coins found in 1994 and 1995, in M. Waelkens & J. Poblome (eds.), Sagalassos IV. Report on the survey and excavation campaigns of 1994 and 1995, Leuven, p. 315-350.'
    if (author_name[0:12] == 'Sagalassos V'): return 'S. Scheers, Coins found in 1996 and 1997, in M. Waelkens & L. Loots (eds.), Sagalassos V. Report on the survey and excavation campaigns of 1996–1997, Leuven, 509-549.'
    if (author_name[0:7] == 'Ireland'): return 'S. Ireland, Greek, Roman and Byzantine coins in the museum at Amasya, London.'
    
    if (author_name[0:7] == 'Goricke'): return 'H. Göricke-Lukić, Justinijanov novac iz Slavonije i Baranje, in N. Cambi & E. Marin (eds.), Radovi XIII. Meðunarodnog kongresa za starokrščansku arheologija. Split-Poreč '
    if (author_name[0:7] == 'Vetters'): return 'S. Karwiese, Ephesos 1980: Liste der Fundmünzen, in H. Vetters (ed.), Ephesos. Vorläufiger Grabungsbericht 1980, Vienna, p. 154-160.'
    if (author_name[0:4] == 'Okcu'): return 'R. Okçu (ed.), The Archaeological Museum of Bursa: coin exhibition catalogue, Bursa.'
    if (author_name[0:4] == 'Bell'): return 'Missing reference: '
    if (author_name[0:7] == 'Tsourti'): return 'E. Tsourti, Antikyra Boiotias. Nomismatiki marturia, in L. Kypraiou (ed.), Thorakion: aphieroma ste mneme tou Paulou Lazaride, Athens, p. 123-128.'
    if (author_name[0:8] == 'Boshkova'): return 'B. Bozhkova, Monetni nakhodki ot arkheologicheski kompleks (IV-VII v.) “Iuzhen Park – Lozenets”, Sofiia, Numizmatika, sfragistika i epigrafica 1, p. 73-86.'
    if (author_name[0:3] == 'Kos'): return 'P. Kos, The monetary circulation in the Southeastern Alpine region ca 300 B.C. - A.D. 1000, Situla 24, p. 1-254.'
    
    if (author_name[0:6] == 'Fisher'):
        if (year == '1971'): return 'C. K. Williams II & J. Fisher, Corinth, 1970: Forum area, Hesperia 40, no. 1, p. 1-51.'
        if (year == '1975'): return 'C. K. Williams II & J. Fisher, Corinth, 1974: Forum Southwest, Hesperia 44, no. 1, p. 1-50.'
        if (year == '1976'): return 'C. K. Williams II & J. Fisher, Corinth, 1975: Forum Southwest, Hesperia 45, no. 2, p. 99-162.'
    if (author_name[0:6] == 'Zervos'): 
        if year == '1986': return 'C. K. Williams II & O. H. Zervos, Corinth, 1985: east of the theater, Hesperia 55, no. 2, p. 129-175. '
        if year == '1991': return 'C. K. Williams II & O. H. Zervos, Corinth, 1985: east of the theater, Hesperia 55, no. 2, p. 129-175. '
    
    
    # the reference below is missing a year in the database
    if (author_name == 'Oberlander') and (year == ''): return 'E. Oberländer-Târnoveanu & E.-M. Constantinescu, Monede romane târzii şi bizantine din colecţia Muzeului Judeţean Buzău, Mousaios 4, p. 311-341.'
    if (author_name == 'Butnariu'): return 'V. M. Butnariu, Răspîndirea monedelor bizantine din secolele VI-VII în teritoriile carpato-dunărene, Buletinul Societății Numismatice Române 131-133, p. 199-235.'
    if (author_name[0:6] == 'Arslan'): return 'E. Arslan (ed.), Repertorio dei ritrovamenti di moneta Altomedievale in Italia (489-1002). http://www.ermannoarslan.it/Repertorio/index.php (last update, 30 August, 2016).'
    
    # this has a discrepancy between year in dataset and year in bibliography
    if (author_name[0:7] == 'Lazarov'): return 'L. Lazarov, Moneti ot 16 numii na Iustinian I, secheni v Tesalonika i otkriti v Bulgariia, Numizmatika 25, no. 1-2, p. 16-22.'

    space_loc = author_name.find(" ")
    if space_loc > -1:
        #print(author_name, end=", ")
        author_name = author_name[:space_loc]
        author_works = bibliography[bibliography['Author'] == author_name]
        author_year = author_works[author_works['Year'] == year]
        #print(author_name, author_year)
    
    try:
        reference = list(author_year['Reference'])[0]
        return reference
    except:
        print('Error in get_full_reference: ', end=' ')
        print(author_name + "-" + year)

#get_full_reference('Mihaylov', '2014')

In [8]:
def get_ruler(start, end):
    if start >= 518 and end <= 527: return 'Justin I'
    if start >= 527 and end <= 565: return 'Justinian I'
    if start >= 565 and end <= 578: return 'Justin II'
    if start >= 578 and end <= 582: return 'Tiberius II Constantine'
    if start >= 582 and end <= 602: return 'Maurice'
    if start >= 602 and end <= 608: return 'Phokas'
    return 'Heraklios'

#dealing with coordinates
def simplify(temp_str):
    temp_str = temp_str.replace('ț', 't')
    temp_str = temp_str.replace('ţ', 't')
    temp_str = temp_str.replace('ä', 'a')
    temp_str = temp_str.replace('é', 'e')
    temp_str = temp_str.replace('ö', 'o')
    temp_str = temp_str.replace('ü', 'u')
    temp_str = temp_str.replace('ë', 'e')
    temp_str = temp_str.replace('ș', 's')
    temp_str = temp_str.replace('ș', 's')
    temp_str = temp_str.replace('ă', 'a')
    temp_str = temp_str.replace('â', 'a')
    temp_str = temp_str.replace('ç', 'c')
    temp_str = temp_str.replace('Ç', 'C')
    temp_str = temp_str.replace('î', 'i')
    temp_str = temp_str.replace('í', 'i')
    temp_str = temp_str.replace('ć', 'c')
    temp_str = temp_str.replace('š', 's')
    temp_str = temp_str.replace('Š', 'S')
    temp_str = temp_str.replace('Ș', 'S')
    temp_str = temp_str.replace('č', 'c')
    temp_str = temp_str.replace('ğ', 'g')
    temp_str = temp_str.replace('Ü', 'U')
    temp_str = temp_str.replace('ł', 'l')
    temp_str = temp_str.replace('ı', 'i')
    space = temp_str.find(' ')
    if space > -1:
        temp_str = temp_str[:space]
    return temp_str

# fixing exception w/ coordinates
def fix_locations(ths):    
    ths.loc[ths['Location'] == 'Smirna', 'Location'] = 'Smyrna'
    #ths.loc[ths['Location'] == 'SantAgata Feltria', 'Location'] = 'Smyrna'
    ths.loc[ths['Location'] == 'Bichvinta', 'Location'] = 'Bich’vinta'
    ths.loc[ths['Location'] == 'Ephesus', 'Location'] = 'Efes Ephesus'
    ths.loc[ths['Location'] == 'Hammat Gader', 'Location'] = 'Hamat Gader'
    ths.loc[ths['Location'] == 'Patalenitsa', 'Location'] = 'Patelenitsa'
    ths.loc[ths['Location'] == 'Halmyris', 'Location'] = 'Murighiol (Halmyris)'
    ths.loc[ths['Location'] == 'Heracleion', 'Location'] = 'Herakleion'
    ths.loc[ths['Location'] == 'Melitene', 'Location'] = 'Malatya (Melitene)'
    ths.loc[ths['Location'] == 'Mt Nebo', 'Location'] = 'Mount Nebo'
    ths.loc[ths['Location'] == 'Dionysopolis', 'Location'] = 'Balchik (Dionyssopolis)'
    ths.loc[ths['Location'] == 'Salamis', 'Location'] = 'Salamis-Constantia'
    ths.loc[ths['Location'] == 'Szegvar', 'Location'] = 'Segvar'
    ths.loc[ths['Location'] == 'Barbalissus', 'Location'] = 'Balis (Barbalissus)'
    ths.loc[ths['Location'] == 'Schutzen am Gebirge', 'Location'] = 'Shutzen am Gebirge'
    ths.loc[ths['Location'] == 'Dunareni-Bratca', 'Location'] = 'Dunareni-Bratca'
    ths.loc[ths['Location'] == 'Sacidava', 'Location'] = 'Sacidava'
    #ths.loc[ths['Location'] == 'Ostrov', 'Location'] = ''
    #ths.loc[ths['Location'] == 'Yapi Credi', 'Location'] = ''
    #ths.loc[ths['Location'] == 'Sofia-Lozenets', 'Location'] = ''
    #ths.loc[ths['Location'] == 'Troesmis-Iglita', 'Location'] = ''
    ths.loc[ths['Location'] == 'Moesia II', 'Location'] = 'Moesia Secunda'

    return ths


In [9]:
def get_full_bibliography(coin_finds):
    bibliography = get_bibliography()
    test_list = list(coin_finds['bibliography'])
    full_references = []
    for i in test_list:
        author_and_date = get_info_from_db(i)
        ref = get_full_reference(author_and_date[0], author_and_date[1], bibliography)
        full_references.append(ref)
    print(len(full_references), end=' ')
    print('References; should be 1228.')
    return full_references

In [10]:
def get_andrei_coordinates(place, andrei):
    place = simplify(place)
    try:
        lat = andrei.loc[place]['lat']
        lng = andrei.loc[place]['lng']
        #if type(lat) != str:
        #    lat = lat.iloc[0]
        #    lng = lng.iloc[0]
        return [lat, lng]
    except:
        print('Error: ' + place + ' not in index.')
        return 1

def fix_coordinates(ths, andrei):
    cnt = 0
    error_list = []
    lat = []
    lng = []
    for i in range(len(list(ths['Location']))):
        counter = get_andrei_coordinates(ths['Location'].iloc[i], andrei)
        if type(counter) == int:
            cnt += 1
            error_list.append(ths['Location'].iloc[i])
        else:
            lat.append(counter[0])
            lng.append(counter[1])       

    set_errors = set(error_list)
    if len(set_errors) == 0:
        print('No errors with locations')
    else:
        print(set_errors)
        print(len(set_errors))
    return lat,lng

In [11]:
#delete this cell after testing:
'''
ths = pd.read_csv('THS.csv')
cols = ["ID","Location","Region","Denomination","Date1","Date2","Number","Notes","Bibliography"]
ths.columns = cols
ths.index = ths['ID']
ths = convert_denomination(ths)
ths = fix_locations(ths)


andrei = pd.read_csv('andreicoordinates.csv')
andrei['Name'] = andrei['Name'].apply(simplify)
andrei = andrei.set_index('Name')


fix_coordinates(ths[1000:1200], andrei)
'''


'\nths = pd.read_csv(\'THS.csv\')\ncols = ["ID","Location","Region","Denomination","Date1","Date2","Number","Notes","Bibliography"]\nths.columns = cols\nths.index = ths[\'ID\']\nths = convert_denomination(ths)\nths = fix_locations(ths)\n\n\nandrei = pd.read_csv(\'andreicoordinates.csv\')\nandrei[\'Name\'] = andrei[\'Name\'].apply(simplify)\nandrei = andrei.set_index(\'Name\')\n\n\nfix_coordinates(ths[1000:1200], andrei)\n'

In [12]:
# main part begins here
ths = convert_denomination(ths)
ths = fix_locations(ths)

coin_finds = setting_coin_finds(ths)
coin_groups = setting_coin_groups(ths)

coin_finds['full_bibliography'] = get_full_bibliography(coin_finds) # establishing bibliography

1228 References; should be 1228.


In [13]:
# setting ruler for all entries
start_years = list(coin_groups['start_year'])
end_years = list(coin_groups['end_year'])
rulers = []
for i in range(len(start_years)):
    ruler = get_ruler(start_years[i], end_years[i])
    rulers.append(ruler)
coin_groups['ruler'] = rulers

In [14]:
andrei = pd.read_csv('andreicoordinates.csv')
andrei['Name'] = andrei['Name'].apply(simplify)
andrei = andrei.set_index('Name')

lat, lng = fix_coordinates(ths,andrei)
coin_finds['lat'] = lat
coin_finds['long'] = lng

No errors with locations


In [15]:
ttt = []
for i in range(len(lng)):
    try:
        if ' ' in lng[i]: ttt.append(i)
    except:
        ttt.append(i)

coin_finds.loc[ttt]['long']

ID
0             NaN
1       27.950000
2       28.760000
3       27.951944
4       27.951944
5       27.951944
6       27.951944
7       27.951944
8       28.583330
9       28.583330
10      27.988644
11      27.988644
12      27.988644
13      28.083333
14      28.083333
15      28.131389
16      28.583330
17      28.583330
18      28.583330
19      28.466670
20      28.466670
21      28.466670
22      28.466670
23      28.466670
24      28.466670
25      28.466670
26      28.466670
27      28.466670
28      28.466670
29      28.466670
          ...    
1198    28.534448
1199    28.534448
1200    32.887700
1201    32.887700
1202    32.887700
1203    20.010000
1204    23.840000
1205    23.840000
1206    22.816667
1207    23.840000
1208    24.286389
1209    22.716667
1210    22.716667
1211    21.342500
1212    20.921667
1213    23.037837
1214    23.037837
1215    23.037837
1216    23.037837
1217    23.037837
1218    23.037837
1219    23.037837
1220    23.037837
1221    23.037837
1222   

In [16]:
coin_finds.to_csv('ths_finds.csv')
coin_groups.to_csv('ths_coins.csv')
    

In [17]:
andrei['lat'] = pd.to_numeric(andrei['lat'])
andrei['lng'] = pd.to_numeric(andrei['lng'])

In [18]:
andrei[andrei['lng'] > 40]
#andrei.loc['Transylvania']

Unnamed: 0_level_0,lng,lat,id
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Bich’vinta,40.3333,43.1666,167.0
Feltre,46.016667,11.9,
Lazica,41.876922,42.013339,
Mesopotamia,42.942788,37.190421,
Nokalakevi,42.1833,42.35,168.0
Oderzo,45.78,12.488056,
Sant'Agata,43.866667,12.216667,


In [19]:
coin_finds.groupby('place_large').count()

Unnamed: 0_level_0,hoard_id,name,startDate,endDate,type_find,hoard?,excavation?,single?,num_coins,num_known_coins,...,comments,bibliography,lat,long,certainty,owner,created,imported,place_small,full_bibliography
place_large,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Achaia,78,78,78,78,78,78,78,78,78,78,...,4,78,78,78,78,78,78,78,78,78
Africa,2,2,2,2,2,2,2,2,2,2,...,0,2,2,2,2,2,2,2,2,2
Arabia,19,19,19,19,19,19,19,19,19,19,...,0,19,19,19,19,19,19,19,19,19
Barbaricum,35,35,35,35,35,35,35,35,35,35,...,0,35,35,35,35,35,35,35,35,35
Barbaricum/Dalmatia,3,3,3,3,3,3,3,3,3,3,...,0,3,3,3,3,3,3,3,3,3
Crete,5,5,5,5,5,5,5,5,5,5,...,0,5,5,5,5,5,5,5,5,5
Cyprus,19,19,19,19,19,19,19,19,19,19,...,1,19,19,19,19,19,19,19,19,19
Dacia,147,147,147,147,147,147,147,147,147,147,...,6,147,147,147,147,147,147,147,147,147
Dalmatia,40,40,40,40,40,40,40,40,40,40,...,4,40,40,40,40,40,40,40,40,40
Egypt,2,2,2,2,2,2,2,2,2,2,...,1,2,2,2,2,2,2,2,2,2
