# Manual Changes

## template mapping files are in the git repository
## original data in _CyVerse Discovery Environment_ 
### data file is: "Horse data_Helena.csv"

### _decimalLatitude_
- get rid of "S" and make sure it is a negative ("-") value

### _decimalLongitude_
- get ride of "W" and make sure it is a negative ("-") value

### _unused columns_
- specimenType

## To Code:
### _basisOfRecord_
- change from "fossil" to "fossilSpecimen"

### _measurementUnit_
- change "millimeters" to "mm"

In [1]:
import pandas as pd

In [2]:
#Import Horse Data

#Importing horse data from a comma delimited file
#horseData = pd.read_csv("Horse data_Helena.csv")
#horseData

#Importing horse data from a tab delimited file
horseData = pd.read_csv("Horse data_Helena.txt", sep='\t', engine = 'python')

In [36]:
#Cleaning the data

#Combining SpecimenType to MeasurementType Columns
horseData['test'] = horseData['specimenType'].str.cat(horseData['measurementType'])

#Standardization from fossil to FossilSpecimen
horseData.loc[horseData['basisOfRecord'] == 'fossil', 'basisOfRecord'] = 'FossilSpecimen'
#horseData['basisOfRecord']

#Measurement unit from millimeters to mm
horseData.loc[horseData['measurementUnit'] == 'millimeters', 'measurementUnit'] = 'mm'
#horseData['measurementUnit']

In [37]:
#Rearrange columns so that template columns are first, followed by measurement values

#Create column list
cols = horseData.columns.tolist()

#Specify desired columns
cols = ['institutionCode',
        'collectionCode',
        'specimenID',
        'side',
        'scientificName',
        'decimalLatitude',
        'decimalLongitude',
        'sitename',
        'verbatimLocality',
        'basisOfRecord',
        'test',
        'measurementValue',
        'measurementUnit',
        'lithostratigraphicTerms',
        'formation',
        'member',
        'references']

#Subset dataframe
horseData = horseData[cols]

In [38]:
#Matching template and column terms

#Renaming columns 
horseData = horseData.rename(columns = {'specimenID':'catalogNumber',
                                        'specimenType':'skeletalElement',
                                        'side':'measurementSide',
                                        'sitename':'locality',
                                        'test': 'measurementType'})

In [40]:
#Writing the final dataframe as a tab delimited file
horseData.to_csv('Horse_Data_Mapped.txt', sep = '\t')

#Writing the final dataframe as a tab delimited file
horseData.to_csv('Horse_Data_Mapped.csv')
