In [7]:
'''
#Gigacow-tools# - preprocessing
This scripts used for single cow data collection work.
Data Tables: gigacow_filter.csv, lactation_filter.csv, traffic_raw_filter.csv
'''

import pandas as pd
import numpy as np

#access data from local directory
dataDir = "C:/Users/Savior_Hn/Desktop/Gigacow/"
gigacow = pd.read_csv(dataDir + 'gigacow_filter.csv', encoding='utf-8')
lactation = pd.read_csv(dataDir + 'lactation_filter.csv', encoding='utf-8')
traffic = pd.read_csv(dataDir + 'traffic_raw_filter.csv', encoding='utf-8')
#check out cows with most milking events
print(traffic.Gigacow_Cow_Id.value_counts().nlargest(10))

2022    9107
1535    8929
1095    8630
2724    8594
3046    8570
2197    8255
3726    8023
5661    7909
5533    7870
2053    7819
Name: Gigacow_Cow_Id, dtype: int64


In [25]:
#select single cow from the traffic table
traffic_single_cow = traffic.loc[traffic['Gigacow_Cow_Id'] == 2022]
traffic_single_cow.sort_values(by='TrafficEventDateTime', inplace=True)
traffic_single_cow.index = range(len(traffic_single_cow))

'''
    Extract Milking Event and its most recent traffic event to calculate T2-T1
    T1: Entry time into the Mjolkfalla
    T2: Entry time into the milking robot
    T2-T1: calculate time difference between T2&T1 (i.e., Time spend in Mjolkfalla/holding area)
'''
# locate mikling event by searching 'kg' keyword in traffic result
# the most recent traffic event to milking event should be pre_milking event
# need to filter out records with gate failure
track_milking = traffic_single_cow.TrafficResult.str.contains('kg', regex=False)
milking_index_list = track_milking.index[track_milking.values == True].tolist()
pre_milking_index_list = [x-1 for x in milking_index_list]
milking_traffic = traffic_single_cow[traffic_single_cow.index.isin(milking_index_list)]
pre_milking_traffic = traffic_single_cow[traffic_single_cow.index.isin(pre_milking_index_list)]

# drop rows that the gate failed to detect cows but have milking result
# previous area in milking_traffic table should only be Mjolkfalla
# previous area in pre_milking_traffic table should not be Mjolkfalla
failed_list_1_milk = milking_traffic.index[milking_traffic['PreviousArea'] == 'Koridor till Sorteringsgrind 2'].tolist()
failed_list_1_pre = [x-1 for x in failed_list_1_milk]
failed_list_2_pre = pre_milking_traffic.index[pre_milking_traffic['PreviousArea'] == 'Mjolkfalla'].tolist()
failed_list_2_milk = [x+1 for x in failed_list_2_pre]
# traffic result in pre_milking_traffic table should contain Mjolkfalla
track_pre_milking = pre_milking_traffic.TrafficResult.str.contains('Mjolkfalla', regex=False)
failed_list_3_pre = track_pre_milking.index[track_pre_milking.values == False].tolist()
failed_list_3_milk = [x+1 for x in failed_list_3_pre]

# remove failed records based on index list
milking_traffic_failed = failed_list_1_milk + failed_list_2_milk + failed_list_3_milk
pre_milking_traffic_failed = failed_list_1_pre + failed_list_2_pre + failed_list_3_pre
milking_traffic.drop(axis=0, index=milking_traffic_failed, inplace=True)
pre_milking_traffic.drop(axis=0, index=pre_milking_traffic_failed, inplace=True)
# concatenate two tables to track the traffic directly
all_milking_traffic = pd.concat([milking_traffic, pre_milking_traffic])
all_milking_traffic.sort_values(by=['TrafficEventDateTime'], inplace=True)
#rename table columns for merging
milking_traffic.rename(columns={"TrafficEventDateTime": "MilkingEventDateTime", "TrafficResult": "MilkProduction", "TimeInArea_totalSeconds": "RoundedSecondsTimeInArea"}, inplace=True)
pre_milking_traffic.rename(columns={"TrafficEventDateTime": "Pre_MilkingEventDateTime", "TimeInArea_totalSeconds": "RoundedSecondsTimeInArea"}, inplace=True)
# unify the index of two tables
milking_traffic.index = range(len(milking_traffic))
pre_milking_traffic.index = range(len(pre_milking_traffic))
# inert "pre_traffic_milking" to milking traffic table
milking_traffic.insert(4, 'Pre_MilkingEventDateTime', pre_milking_traffic['Pre_MilkingEventDateTime'])
# calculate T2-T1
milking_traffic.MilkingEventDateTime = pd.to_datetime(milking_traffic.MilkingEventDateTime)
milking_traffic.Pre_MilkingEventDateTime = pd.to_datetime(milking_traffic.Pre_MilkingEventDateTime)
milking_traffic['timeDelta_Seconds'] = (milking_traffic['MilkingEventDateTime'] - milking_traffic['Pre_MilkingEventDateTime']).dt.total_seconds()
#extract traffic result(milk production)
milking_traffic['MilkProduction'].replace(r"[^0-9.,]+"," ", inplace=True, regex=True)
milking_traffic['MilkProduction'].replace(r"\s*","", inplace=True, regex=True)
milking_traffic['MilkProduction'].replace(r"[,]+",".", inplace=True, regex=True)
milking_traffic['MilkProduction'] = milking_traffic['MilkProduction'].astype('float64')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  traffic_single_cow.sort_values(by='TrafficEventDateTime', inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  milking_traffic.drop(axis=0, index=milking_traffic_failed, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pre_milking_traffic.drop(axis=0, index=pre_milking_traffic_failed, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/

In [None]:
#merge all the other features into one table


In [11]:
#all_milking_traffic.to_csv(dataDir+'all_milking_traffic.csv')
all_milking_traffic

Unnamed: 0,Traffic_Id,FarmName_Pseudo,Gigacow_Cow_Id,GroupName,TrafficEventDateTime,PreviousArea,TimeInArea_totalSeconds,TrafficDeviceName,TrafficResult,MilkingInterval_totalSeconds
0,1671239,a624fb9a,2022,VMS 1,2020-01-22 14:18:30,Koridor till Sorteringsgrind 2,29.0,Sorteringsgrind 2 Trevägsgrind,Separated Left to Mjolkfalla,0.0
1,3285463,a624fb9a,2022,VMS 1,2020-01-22 15:03:13,Mjolkfalla,2683.0,VMS2,"VMSSessionMilkYield 0,29 kg",0.0
43,6070478,a624fb9a,2022,VMS 1,2020-01-23 05:54:12,Koridor till Sorteringsgrind 2,107.0,Sorteringsgrind 2 Trevägsgrind,Separated Left to Mjolkfalla,0.0
44,1002503,a624fb9a,2022,VMS 1,2020-01-23 06:18:07,Mjolkfalla,1434.0,VMS2,"VMSSessionMilkYield 5,04 kg",53788.0
52,2727028,a624fb9a,2022,VMS 1,2020-01-23 18:03:17,Koridor till Sorteringsgrind 2,512.0,Sorteringsgrind 2 Trevägsgrind,Separated Left to Mjolkfalla,0.0
...,...,...,...,...,...,...,...,...,...,...
9093,5113192,a624fb9a,2022,VMS 1,2022-05-09 03:30:17,Mjolkfalla,1260.0,MS1,Milking 9.15 kg,21000.0
9099,736819,a624fb9a,2022,VMS 1,2022-05-09 16:19:46,Koridor till Sorteringsgrind 2,60.0,Sorteringsgrind 2 Trevägsgrind,Separated Left to Mjolkfalla,
9100,5407225,a624fb9a,2022,VMS 1,2022-05-09 16:26:15,Mjolkfalla,360.0,MS1,Milking 13.10 kg,23700.0
9105,6031194,a624fb9a,2022,VMS 1,2022-05-09 22:46:07,Koridor till Sorteringsgrind 2,0.0,Sorteringsgrind 2 Trevägsgrind,Separated Left to Mjolkfalla,


In [11]:
#pre_milking_traffic.to_csv(dataDir+'pre_milking_traffic.csv')
pre_milking_traffic

Unnamed: 0,Traffic_Id,FarmName_Pseudo,Gigacow_Cow_Id,GroupName,Pre_MilkingEventDateTime,PreviousArea,RoundedSecondsTimeInArea,TrafficDeviceName,TrafficResult,MilkingInterval_totalSeconds
0,1671239,a624fb9a,2022,VMS 1,2020-01-22 14:18:30,Koridor till Sorteringsgrind 2,29.0,Sorteringsgrind 2 Trevägsgrind,Separated Left to Mjolkfalla,0.0
43,6070478,a624fb9a,2022,VMS 1,2020-01-23 05:54:12,Koridor till Sorteringsgrind 2,107.0,Sorteringsgrind 2 Trevägsgrind,Separated Left to Mjolkfalla,0.0
52,2727028,a624fb9a,2022,VMS 1,2020-01-23 18:03:17,Koridor till Sorteringsgrind 2,512.0,Sorteringsgrind 2 Trevägsgrind,Separated Left to Mjolkfalla,0.0
58,293150,a624fb9a,2022,VMS 1,2020-01-24 05:38:09,Koridor till Sorteringsgrind 2,45.0,Sorteringsgrind 2 Trevägsgrind,Separated Left to Mjolkfalla,0.0
90,6198260,a624fb9a,2022,VMS 1,2020-01-24 19:14:54,Koridor till Sorteringsgrind 2,158.0,Sorteringsgrind 2 Trevägsgrind,Separated Left to Mjolkfalla,0.0
...,...,...,...,...,...,...,...,...,...,...
9085,3788188,a624fb9a,2022,VMS 1,2022-05-08 14:57:24,Koridor till Sorteringsgrind 2,0.0,Sorteringsgrind 2 Trevägsgrind,Separated Left to Mjolkfalla,
9089,2191261,a624fb9a,2022,VMS 1,2022-05-08 21:33:54,Koridor till Sorteringsgrind 2,0.0,Sorteringsgrind 2 Trevägsgrind,Separated Left to Mjolkfalla,
9092,389514,a624fb9a,2022,VMS 1,2022-05-09 03:08:32,Koridor till Sorteringsgrind 2,0.0,Sorteringsgrind 2 Trevägsgrind,Separated Left to Mjolkfalla,
9099,736819,a624fb9a,2022,VMS 1,2022-05-09 16:19:46,Koridor till Sorteringsgrind 2,60.0,Sorteringsgrind 2 Trevägsgrind,Separated Left to Mjolkfalla,


In [26]:
#milking_traffic.to_csv(dataDir+'milking_traffic.csv')
milking_traffic

Unnamed: 0,Traffic_Id,FarmName_Pseudo,Gigacow_Cow_Id,GroupName,Pre_MilkingEventDateTime,MilkingEventDateTime,PreviousArea,RoundedSecondsTimeInArea,TrafficDeviceName,MilkProduction,MilkingInterval_totalSeconds,timeDelta_Seconds
0,3285463,a624fb9a,2022,VMS 1,2020-01-22 14:18:30,2020-01-22 15:03:13,Mjolkfalla,2683.0,VMS2,0.29,0.0,2683.0
1,1002503,a624fb9a,2022,VMS 1,2020-01-23 05:54:12,2020-01-23 06:18:07,Mjolkfalla,1434.0,VMS2,5.04,53788.0,1435.0
2,3365454,a624fb9a,2022,VMS 1,2020-01-23 18:03:17,2020-01-23 19:35:17,Mjolkfalla,5519.0,VMS 1,6.99,46875.0,5520.0
3,1809504,a624fb9a,2022,VMS 1,2020-01-24 05:38:09,2020-01-24 06:46:51,Mjolkfalla,4122.0,VMS2,5.07,39684.0,4122.0
4,3203506,a624fb9a,2022,VMS 1,2020-01-24 19:14:54,2020-01-24 19:57:34,Mjolkfalla,2560.0,VMS2,6.38,47088.0,2560.0
...,...,...,...,...,...,...,...,...,...,...,...,...
1278,5057939,a624fb9a,2022,VMS 1,2022-05-08 14:57:24,2022-05-08 15:48:52,Mjolkfalla,3060.0,MS2,11.90,27480.0,3088.0
1279,1821897,a624fb9a,2022,VMS 1,2022-05-08 21:33:54,2022-05-08 21:34:08,Mjolkfalla,0.0,MS2,9.05,20340.0,14.0
1280,5113192,a624fb9a,2022,VMS 1,2022-05-09 03:08:32,2022-05-09 03:30:17,Mjolkfalla,1260.0,MS1,9.15,21000.0,1305.0
1281,5407225,a624fb9a,2022,VMS 1,2022-05-09 16:19:46,2022-05-09 16:26:15,Mjolkfalla,360.0,MS1,13.10,23700.0,389.0
