# Manual Changes

## template mapping files are in the git repository

## original data in _CyVerse Discovery Environment_ 
### data file is: "ODOVIRGCLEAN.csv"

### _lifeStage_ and _ageValue_
- in _lifestage_
- separate out lifeStage (e.g., juvenile, adult) from ageValue and ageUnit
- create new columns "ageValue" and "ageUnit"

### _unused columns_
- LocationCode
- Note

## To Code
### _measurementValue_
- select only "1st_" measurement

In [9]:
import pandas as pd

In [10]:
#Import Deer VertNet Data
deer = pd.read_csv("https://de.cyverse.org/dl/d/126821C9-D23A-4B22-9B3F-25F19311066E/ODOVIRGCLEAN.csv")

In [11]:
#Rearrange columns so that template columns are first, followed by measurement values

#Create column list
cols = deer.columns.tolist()

#Specify desired columns
cols = ['catalognumber',
        'collectioncode',
        'decimallatitude',
        'decimallongitude',
        'eventdate',
        'institutioncode',
        'lifestage',
        'locality',
        'sex',
        'scientificname',
        '1st_body_mass',
        '1st_ear_length',
        '1st_hind_foot_length',
        '1st_tail_length',
        '1st_total_length']

#Subset dataframe
deer = deer[cols]

In [12]:
#Matching template and column terms

#Renaming columns 
deer = deer.rename(columns = {'catalognumber':'catalogNumber', 
                            'collectioncode':'collectionCode',
                            'decimallatitude':'decimalLatitude',
                            'decimallongitude':'decimalLongitude',
                            'eventdate':'verbatimEventDate',
                            'institutioncode' :'institutionCode',
                            'lifestage':'verbatimAgeValue',
                            'locality':'verbatimLocality',
                            'scientificname':'scientificName'})


In [13]:
#Matching trait and ontology terms

#Renaming columns
deer = deer.rename(columns={'1st_body_mass':'body mass',
                            '1st_ear_length': 'ear length',
                            '1st_hind_foot_length':'hind foot length',
                            '1st_tail_length':'tail length',
                            '1st_total_length':'full body length'})


In [18]:
#create new column individualID that has a unique identifer (e.g., collectionCode, insitutionCode, catalogNumber)
deer=deer.assign(individualID = deer['collectionCode'] + deer['institutionCode']+ deer['catalogNumber'])

Unnamed: 0,catalogNumber,collectionCode,decimalLatitude,decimalLongitude,verbatimEventDate,institutionCode,verbatimAgeValue,verbatimLocality,sex,scientificName,body mass,ear length,hind foot length,tail length,full body length,individualID
0,18192,Mammals,17.765560,-64.881668,8/3/72,TTU,,CALIDONIA ESTATE,male,Odocoileus virginianus,9979.0,102.0,275.0,102.0,813.0,MammalsTTU18192
1,UAZ 24592,Mammals,31.392203,-110.702582,11/20/81,UAZ,,"T23S, R16E, ~1 mi. NW Washington Camp on Duque...",female,Odocoileus virginianus,8500.0,120.0,225.0,147.0,835.0,MammalsUAZUAZ 24592
2,18217,Mammals,18.000000,-64.666672,5/5/72,TTU,,,female,Odocoileus virginianus,9979.0,102.0,279.0,102.0,838.0,MammalsTTU18217
3,32127,Mammal specimens,46.247500,-123.412500,1979,UWBM,,Columbian White-tailed Deer Refuge,male,Odocoileus virginianus,5100.0,90.0,300.0,140.0,850.0,Mammal specimensUWBM32127
4,50139,Mammals,,,2009,OMNH,Juvenile,,female,Odocoileus virginianus,5880.0,100.0,285.0,150.0,858.0,MammalsOMNH50139
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16899,,,,,,,,,,,,,,,,
16900,,,,,,,,,,,,,,,,
16901,,,,,,,,,,,,,,,,
16902,,,,,,,,,,,,,,,,


In [20]:
#create new column basisOfRecord which is "preservedSpecimen"
deer=deer.assign(basisOfRecord = 'preservedSpecimen')

Unnamed: 0,catalogNumber,collectionCode,decimalLatitude,decimalLongitude,verbatimEventDate,institutionCode,verbatimAgeValue,verbatimLocality,sex,scientificName,body mass,ear length,hind foot length,tail length,full body length,individualID,basisOfRecord
0,18192,Mammals,17.765560,-64.881668,8/3/72,TTU,,CALIDONIA ESTATE,male,Odocoileus virginianus,9979.0,102.0,275.0,102.0,813.0,MammalsTTU18192,preservedSpecimen
1,UAZ 24592,Mammals,31.392203,-110.702582,11/20/81,UAZ,,"T23S, R16E, ~1 mi. NW Washington Camp on Duque...",female,Odocoileus virginianus,8500.0,120.0,225.0,147.0,835.0,MammalsUAZUAZ 24592,preservedSpecimen
2,18217,Mammals,18.000000,-64.666672,5/5/72,TTU,,,female,Odocoileus virginianus,9979.0,102.0,279.0,102.0,838.0,MammalsTTU18217,preservedSpecimen
3,32127,Mammal specimens,46.247500,-123.412500,1979,UWBM,,Columbian White-tailed Deer Refuge,male,Odocoileus virginianus,5100.0,90.0,300.0,140.0,850.0,Mammal specimensUWBM32127,preservedSpecimen
4,50139,Mammals,,,2009,OMNH,Juvenile,,female,Odocoileus virginianus,5880.0,100.0,285.0,150.0,858.0,MammalsOMNH50139,preservedSpecimen
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16899,,,,,,,,,,,,,,,,,preservedSpecimen
16900,,,,,,,,,,,,,,,,,preservedSpecimen
16901,,,,,,,,,,,,,,,,,preservedSpecimen
16902,,,,,,,,,,,,,,,,,preservedSpecimen


In [21]:
#include country, yearCollected
deer=deer.assign(country = 'unknown')
deer=deer.assign(yearCollected = 'unknown')

In [7]:
#create long version so that each trait has its own row

#creating long version, first specifiying keep variables, then naming variable and value
longVers=pd.melt(deer, 
                id_vars=['catalogNumber', 
                'collectionCode',
                'decimalLatitude',
                'decimalLongitude', 
                'verbatimEventDate', 
                'institutionCode',
                'lifeStage',
                'verbatimLocality',
                'sex',
                'scientificName'], 
        var_name = 'measurementType', 
        value_name = 'measurementValue')


In [None]:
#create materialSampleID which is a UUID for each measurement
deer=deer.assign(materialSampleID = 'preservedSpecimen')

In [None]:
#Writing long data csv file
longVers.to_csv('Deer_Data_Long.csv')