In [75]:
'''
#Gigacow-tools# - preprocessing
This scripts used for single cow data collection work.
Data Tables: gigacow_filter.csv, lactation_filter.csv, traffic_raw_filter.csv
'''

import pandas as pd
import numpy as np

#access data from local directory
dataDir = "C:/Users/Savior_Hn/Desktop/Gigacow/"
gigacow_cols = ['AnimalNumber', 'FarmName_Pseudo', 'BreedName', 'Age']
lactation_cols = ['AnimalNumber', 'FarmName_Pseudo', 'LactationInfoDate', 'LactationNumber', 'DaysInMilk']
gigacow = pd.read_csv(dataDir + 'gigacow_filter.csv', encoding='utf-8', usecols=gigacow_cols)
lactation = pd.read_csv(dataDir + 'lactation_filter.csv', encoding='utf-8', usecols=lactation_cols)
traffic = pd.read_csv(dataDir + 'traffic_raw_filter.csv', encoding='utf-8', index_col=False)
#check out cows with most milking events
print(traffic.Gigacow_Cow_Id.value_counts().nlargest(10))

2022    9107
1535    8929
1095    8630
2724    8594
3046    8570
2197    8255
3726    8023
5661    7909
5533    7870
2053    7819
Name: Gigacow_Cow_Id, dtype: int64


In [78]:
#select single cow from the traffic table
traffic_single_cow = traffic.loc[traffic['Gigacow_Cow_Id'] == 1535]
traffic_single_cow.sort_values(by='TrafficEventDateTime', inplace=True)
traffic_single_cow.index = range(len(traffic_single_cow))

'''
    Extract Milking Event and its most recent traffic event to calculate T2-T1
    T1: Entry time into the Mjolkfalla
    T2: Entry time into the milking robot
    T2-T1: calculate time difference between T2&T1 (i.e., Time spend in Mjolkfalla/holding area)
'''
# locate mikling event by searching 'kg' keyword in traffic result
# the most recent traffic event to milking event should be pre_milking event
# need to filter out records with gate failure
track_milking = traffic_single_cow.TrafficResult.str.contains('kg', regex=False)
milking_index_list = track_milking.index[track_milking.values == True].tolist()
pre_milking_index_list = [x-1 for x in milking_index_list]
milking_traffic = traffic_single_cow[traffic_single_cow.index.isin(milking_index_list)]
pre_milking_traffic = traffic_single_cow[traffic_single_cow.index.isin(pre_milking_index_list)]

# drop rows that the gate failed to detect cows but have milking result
# previous area in milking_traffic table should only be Mjolkfalla
# previous area in pre_milking_traffic table should not be Mjolkfalla
failed_list_1_milk = milking_traffic.index[milking_traffic['PreviousArea'] == 'Koridor till Sorteringsgrind 2'].tolist()
failed_list_1_pre = [x-1 for x in failed_list_1_milk]
failed_list_2_pre = pre_milking_traffic.index[pre_milking_traffic['PreviousArea'] == 'Mjolkfalla'].tolist()
failed_list_2_milk = [x+1 for x in failed_list_2_pre]
# traffic result in pre_milking_traffic table should contain Mjolkfalla
track_pre_milking = pre_milking_traffic.TrafficResult.str.contains('Mjolkfalla', regex=False)
failed_list_3_pre = track_pre_milking.index[track_pre_milking.values == False].tolist()
failed_list_3_milk = [x+1 for x in failed_list_3_pre]

# remove failed records based on index list
milking_traffic_failed = failed_list_1_milk + failed_list_2_milk + failed_list_3_milk
pre_milking_traffic_failed = failed_list_1_pre + failed_list_2_pre + failed_list_3_pre
milking_traffic.drop(axis=0, index=milking_traffic_failed, inplace=True)
pre_milking_traffic.drop(axis=0, index=pre_milking_traffic_failed, inplace=True)
# concatenate two tables to track the traffic directly
all_milking_traffic = pd.concat([milking_traffic, pre_milking_traffic])
all_milking_traffic.sort_values(by=['TrafficEventDateTime'], inplace=True)
#rename table columns for merging
milking_traffic.rename(columns={"TrafficEventDateTime": "MilkingEventDateTime", "TrafficResult": "MilkProduction", "TimeInArea_totalSeconds": "RoundedSecondsTimeInArea"}, inplace=True)
pre_milking_traffic.rename(columns={"TrafficEventDateTime": "Pre_MilkingEventDateTime", "TimeInArea_totalSeconds": "RoundedSecondsTimeInArea"}, inplace=True)
# unify the index of two tables
milking_traffic.index = range(len(milking_traffic))
pre_milking_traffic.index = range(len(pre_milking_traffic))
# inert "pre_traffic_milking" to milking traffic table
milking_traffic.insert(5, 'Pre_MilkingEventDateTime', pre_milking_traffic['Pre_MilkingEventDateTime'])
# calculate T2-T1
milking_traffic.MilkingEventDateTime = pd.to_datetime(milking_traffic.MilkingEventDateTime)
milking_traffic.Pre_MilkingEventDateTime = pd.to_datetime(milking_traffic.Pre_MilkingEventDateTime)
milking_traffic['timeDelta_Seconds'] = (milking_traffic['MilkingEventDateTime'] - milking_traffic['Pre_MilkingEventDateTime']).dt.total_seconds()

#extract traffic result(milk production)
milking_traffic['MilkProduction'].replace(r"[^0-9.,]+"," ", inplace=True, regex=True)
milking_traffic['MilkProduction'].replace(r"\s*","", inplace=True, regex=True)
milking_traffic['MilkProduction'].replace(r"[,]+",".", inplace=True, regex=True)
milking_traffic['MilkProduction'] = milking_traffic['MilkProduction'].astype('float64')

# merge all the other features into milking_traffic table
milking_traffic['MilkingDate'] = milking_traffic.MilkingEventDateTime.dt.date
milking_traffic.MilkingDate = pd.to_datetime(milking_traffic.MilkingDate)
lactation.LactationInfoDate = pd.to_datetime(lactation.LactationInfoDate)
single_cow_merge = milking_traffic.merge(lactation, how='left', left_on=['FarmName_Pseudo', 'AnimalNumber', 'MilkingDate'], right_on=['FarmName_Pseudo', 'AnimalNumber', 'LactationInfoDate'])
single_cow_merge = single_cow_merge.merge(gigacow, how='left', left_on=['FarmName_Pseudo', 'AnimalNumber'], right_on=['FarmName_Pseudo', 'AnimalNumber'])
# write the collection result to local directory
single_cow_collection = single_cow_merge[['Gigacow_Cow_Id', 'BreedName', 'Age', 'FarmName_Pseudo', 'GroupName', 'TrafficDeviceName', 'MilkProduction', 'timeDelta_Seconds', 'LactationNumber', 'DaysInMilk']]
single_cow_collection.to_csv(dataDir + 'single_cow_collection.csv')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  traffic_single_cow.sort_values(by='TrafficEventDateTime', inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  milking_traffic.drop(axis=0, index=milking_traffic_failed, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pre_milking_traffic.drop(axis=0, index=pre_milking_traffic_failed, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/

In [77]:
single_cow_merge.to_csv(dataDir + 'single_cow_merge.csv')
single_cow_merge

Unnamed: 0,Traffic_Id,FarmName_Pseudo,AnimalNumber,Gigacow_Cow_Id,GroupName,Pre_MilkingEventDateTime,MilkingEventDateTime,PreviousArea,RoundedSecondsTimeInArea,TrafficDeviceName,MilkProduction,MilkingInterval_totalSeconds,timeDelta_Seconds,MilkingDate,LactationInfoDate,LactationNumber,DaysInMilk,BreedName,Age
0,4707940,a624fb9a,1178,1535,VMS 1,2020-01-16 11:57:47,2020-01-16 12:02:31,Mjolkfalla,284.0,VMS2,16.38,31713.0,284.0,2020-01-16,NaT,,,2,7.58
1,3178073,a624fb9a,1178,1535,VMS 1,2020-01-16 18:01:03,2020-01-16 18:03:48,Mjolkfalla,165.0,VMS 1,10.20,21288.0,165.0,2020-01-16,NaT,,,2,7.58
2,3993340,a624fb9a,1178,1535,VMS 1,2020-01-17 00:55:42,2020-01-17 00:58:09,Mjolkfalla,146.0,VMS2,11.18,24592.0,147.0,2020-01-17,NaT,,,2,7.58
3,6949487,a624fb9a,1178,1535,VMS 1,2020-01-17 17:58:06,2020-01-17 17:58:23,Mjolkfalla,16.0,VMS 1,10.11,19874.0,17.0,2020-01-17,NaT,,,2,7.58
4,3695224,a624fb9a,1178,1535,VMS 1,2020-01-18 10:56:20,2020-01-18 11:40:21,Mjolkfalla,2640.0,VMS2,16.00,30786.0,2641.0,2020-01-18,NaT,,,2,7.58
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1652,2135166,a624fb9a,1178,1535,VMS 1,2022-05-07 22:38:43,2022-05-07 22:38:56,Mjolkfalla,0.0,MS1,11.17,34320.0,13.0,2022-05-07,2022-05-07,4.0,566.0,2,7.58
1653,4097823,a624fb9a,1178,1535,VMS 1,2022-05-08 09:03:20,2022-05-08 09:18:44,Mjolkfalla,900.0,MS2,13.90,38040.0,924.0,2022-05-08,2022-05-08,4.0,567.0,2,7.58
1654,6488207,a624fb9a,1178,1535,VMS 1,2022-05-08 18:31:06,2022-05-08 18:48:18,Mjolkfalla,1020.0,MS1,9.81,33780.0,1032.0,2022-05-08,2022-05-08,4.0,567.0,2,7.58
1655,6206053,a624fb9a,1178,1535,VMS 1,2022-05-09 04:33:08,2022-05-09 04:33:22,Mjolkfalla,0.0,MS1,11.63,34800.0,14.0,2022-05-09,2022-05-09,4.0,568.0,2,7.58


In [32]:
#all_milking_traffic.to_csv(dataDir+'all_milking_traffic.csv')
all_milking_traffic

Unnamed: 0,Traffic_Id,FarmName_Pseudo,AnimalNumber,Gigacow_Cow_Id,GroupName,TrafficEventDateTime,PreviousArea,TimeInArea_totalSeconds,TrafficDeviceName,TrafficResult,MilkingInterval_totalSeconds
1,2048858,a624fb9a,602,5661,VMS 1,2020-01-16 06:14:01,Koridor till Sorteringsgrind 2,17.0,Sorteringsgrind 2 Trevägsgrind,Separated Left to Mjolkfalla,0.0
2,1666688,a624fb9a,602,5661,VMS 1,2020-01-16 06:14:19,Mjolkfalla,17.0,VMS 1,"VMSSessionMilkYield 13,77 kg",22304.0
5,3668373,a624fb9a,602,5661,VMS 1,2020-01-16 13:27:27,Koridor till Sorteringsgrind 2,11.0,Sorteringsgrind 2 Trevägsgrind,Separated Left to Mjolkfalla,0.0
6,1468397,a624fb9a,602,5661,VMS 1,2020-01-16 13:28:08,Mjolkfalla,41.0,VMS2,"VMSSessionMilkYield 14,79 kg",25460.0
10,5736687,a624fb9a,602,5661,VMS 1,2020-01-16 23:46:09,Koridor till Sorteringsgrind 2,19.0,Sorteringsgrind 2 Trevägsgrind,Separated Left to Mjolkfalla,0.0
...,...,...,...,...,...,...,...,...,...,...,...
12494,5442279,f454e660,602,6094,Vms kor (144 platser),2022-05-08 03:05:31,Mjolkfalla,1320.0,Vms 1,"Mjölkning 8,22 kg",32640.0
12498,802021,f454e660,602,6094,Vms kor (144 platser),2022-05-08 14:30:40,Foderbord,17700.0,Ingångsgrind,Avskild Vänster till Mjolkfalla,
12499,122168,f454e660,602,6094,Vms kor (144 platser),2022-05-08 14:32:22,Mjolkfalla,60.0,Vms 1,"Mjölkning 9,38 kg",40800.0
12510,4866868,f454e660,602,6094,Vms kor (144 platser),2022-05-09 10:21:05,Foderbord,34980.0,Ingångsgrind,Avskild Vänster till Mjolkfalla,


In [33]:
#pre_milking_traffic.to_csv(dataDir+'pre_milking_traffic.csv')
pre_milking_traffic

Unnamed: 0,Traffic_Id,FarmName_Pseudo,AnimalNumber,Gigacow_Cow_Id,GroupName,Pre_MilkingEventDateTime,PreviousArea,RoundedSecondsTimeInArea,TrafficDeviceName,TrafficResult,MilkingInterval_totalSeconds
0,2048858,a624fb9a,602,5661,VMS 1,2020-01-16 06:14:01,Koridor till Sorteringsgrind 2,17.0,Sorteringsgrind 2 Trevägsgrind,Separated Left to Mjolkfalla,0.0
1,3668373,a624fb9a,602,5661,VMS 1,2020-01-16 13:27:27,Koridor till Sorteringsgrind 2,11.0,Sorteringsgrind 2 Trevägsgrind,Separated Left to Mjolkfalla,0.0
2,5736687,a624fb9a,602,5661,VMS 1,2020-01-16 23:46:09,Koridor till Sorteringsgrind 2,19.0,Sorteringsgrind 2 Trevägsgrind,Separated Left to Mjolkfalla,0.0
3,5205966,a624fb9a,602,5661,VMS 1,2020-01-17 14:57:28,Koridor till Sorteringsgrind 2,20.0,Sorteringsgrind 2 Trevägsgrind,Separated Left to Mjolkfalla,0.0
4,3318670,a624fb9a,602,5661,VMS 1,2020-01-17 23:31:50,Koridor till Sorteringsgrind 2,42.0,Sorteringsgrind 2 Trevägsgrind,Separated Left to Mjolkfalla,0.0
...,...,...,...,...,...,...,...,...,...,...,...
1683,308660,f454e660,602,6094,Vms kor (144 platser),2022-05-07 03:53:12,Foderbord,13620.0,Ingångsgrind,Avskild Vänster till Mjolkfalla,
1684,2925748,f454e660,602,6094,Vms kor (144 platser),2022-05-07 17:04:21,Foderbord,18240.0,Ingångsgrind,Avskild Vänster till Mjolkfalla,
1685,3838870,f454e660,602,6094,Vms kor (144 platser),2022-05-08 02:43:17,Foderbord,6720.0,Ingångsgrind,Avskild Vänster till Mjolkfalla,
1686,802021,f454e660,602,6094,Vms kor (144 platser),2022-05-08 14:30:40,Foderbord,17700.0,Ingångsgrind,Avskild Vänster till Mjolkfalla,


In [42]:
#milking_traffic.to_csv(dataDir+'milking_traffic.csv')
milking_traffic

Unnamed: 0,Traffic_Id,FarmName_Pseudo,AnimalNumber,Gigacow_Cow_Id,GroupName,Pre_MilkingEventDateTime,MilkingEventDateTime,PreviousArea,RoundedSecondsTimeInArea,TrafficDeviceName,MilkProduction,MilkingInterval_totalSeconds,timeDelta_Seconds
0,1666688,a624fb9a,602,5661,VMS 1,2020-01-16 06:14:01,2020-01-16 06:14:19,Mjolkfalla,17.0,VMS 1,13.77,22304.0,18.0
1,1468397,a624fb9a,602,5661,VMS 1,2020-01-16 13:27:27,2020-01-16 13:28:08,Mjolkfalla,41.0,VMS2,14.79,25460.0,41.0
2,1733246,a624fb9a,602,5661,VMS 1,2020-01-16 23:46:09,2020-01-16 23:49:32,Mjolkfalla,203.0,VMS 1,9.86,17743.0,203.0
3,3889208,a624fb9a,602,5661,VMS 1,2020-01-17 14:57:28,2020-01-17 15:10:41,Mjolkfalla,793.0,VMS2,15.58,26373.0,793.0
4,5305439,a624fb9a,602,5661,VMS 1,2020-01-17 23:31:50,2020-01-17 23:37:36,Mjolkfalla,345.0,VMS2,15.64,29774.0,346.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1683,1569739,f454e660,602,6094,Vms kor (144 platser),2022-05-07 03:53:12,2022-05-07 04:07:37,Mjolkfalla,840.0,Vms 1,10.27,40080.0,865.0
1684,1531750,f454e660,602,6094,Vms kor (144 platser),2022-05-07 17:04:21,2022-05-07 17:55:04,Mjolkfalla,3000.0,Vms 1,12.39,49260.0,3043.0
1685,5442279,f454e660,602,6094,Vms kor (144 platser),2022-05-08 02:43:17,2022-05-08 03:05:31,Mjolkfalla,1320.0,Vms 1,8.22,32640.0,1334.0
1686,122168,f454e660,602,6094,Vms kor (144 platser),2022-05-08 14:30:40,2022-05-08 14:32:22,Mjolkfalla,60.0,Vms 1,9.38,40800.0,102.0


In [79]:
single_cow_collection

Unnamed: 0,Gigacow_Cow_Id,BreedName,Age,FarmName_Pseudo,GroupName,TrafficDeviceName,MilkProduction,timeDelta_Seconds,LactationNumber,DaysInMilk
0,1535,2,7.58,a624fb9a,VMS 1,VMS2,16.38,284.0,,
1,1535,2,7.58,a624fb9a,VMS 1,VMS 1,10.20,165.0,,
2,1535,2,7.58,a624fb9a,VMS 1,VMS2,11.18,147.0,,
3,1535,2,7.58,a624fb9a,VMS 1,VMS 1,10.11,17.0,,
4,1535,2,7.58,a624fb9a,VMS 1,VMS2,16.00,2641.0,,
...,...,...,...,...,...,...,...,...,...,...
1652,1535,2,7.58,a624fb9a,VMS 1,MS1,11.17,13.0,4.0,566.0
1653,1535,2,7.58,a624fb9a,VMS 1,MS2,13.90,924.0,4.0,567.0
1654,1535,2,7.58,a624fb9a,VMS 1,MS1,9.81,1032.0,4.0,567.0
1655,1535,2,7.58,a624fb9a,VMS 1,MS1,11.63,14.0,4.0,568.0
