In [1]:
from pathlib import Path
from matplotlib.style import use
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split


import torch
import torch.nn as nn

dataDir = Path.cwd().parent.parent/'Data/processed/targetCows/'
usecols = ['MilkingEventDateTime', 'FarmName_Pseudo', 'TrafficDeviceName', 'MilkProduction', 'timeDelta_Seconds', 'LactationNumber', 'DaysInMilk', 'BreedName', 'Age']
lacNum = 1
lacNum_next = 2
threshold_rate = 0.45

# labeling cow with good/bad learner(0/1)
def labeling_data(lacNum, lacNum_next, threshold_rate, cow_total):
    global learner
    timeCost1 = cow_total.loc[cow_total.LactationNumber == lacNum].timeDelta_Seconds.mean()
    timeCost2 = cow_total.loc[cow_total.LactationNumber == lacNum_next].timeDelta_Seconds.mean()
    timediff = timeCost1-timeCost2
    #print('result: '+str(timeCost1)+'  '+str(timeCost2)+'  '+str(timediff/timeCost1))
    if timediff > 0:
        if timediff/timeCost1 >= threshold_rate:
            learner = 1 # good learner
        else:
            learner = 0 # bad learner
    cow_total['label'] = learner
    return cow_total

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# integrate all the cows data into one dataset
filelist = list(Path(dataDir).glob('*.*'))
for i, file in enumerate(filelist):
    single_cow = pd.read_csv(file, encoding='utf-8', usecols=usecols, index_col='MilkingEventDateTime')
    single_cow = labeling_data(lacNum, lacNum_next, threshold_rate, single_cow)
    if i == 0:
        cow_total = single_cow
    cow_total = pd.concat([cow_total, single_cow])
cow_total.sort_values(by=['MilkingEventDateTime'], inplace=True)

In [4]:
#df_static_features = cow_total[['FarmName_Pseudo']]
categorical_cols = ['TrafficDeviceName', 'LactationNumber', 'BreedName']
numerical_cols = ['Age', 'MilkProduction', 'timeDelta_Seconds', 'DaysInMilk']
output_col = ['label']
for col in categorical_cols:
    cow_total[col] = cow_total[col].astype('category')

In [8]:
# one-hot encode categorical features
ohe = OneHotEncoder(handle_unknown='ignore',sparse=False)
cat = ohe.fit_transform(cow_total[categorical_cols])
categorical_data = torch.tensor(cat, dtype=torch.float)
# normalize numerical features
scaler = MinMaxScaler()
num = scaler.fit_transform(cow_total[numerical_cols])
numerical_data = torch.tensor(num, dtype=torch.float64)
# convert label to tensor
output = torch.tensor(cow_total['label'])
train_data = torch.cat((numerical_data, categorical_data), 1)

# split training set and test set
df_data = pd.DataFrame(train_data) 
df_ouput = pd.DataFrame(output)
df_dataset = pd.concat([df_data, df_ouput], axis=1)
df_dataset.dropna(inplace=True)
df_dataset.to_csv(dataDir.parent/'cow_dataset.csv', index=False)


In [6]:
#cow_total.dropna
cow_total

Unnamed: 0_level_0,FarmName_Pseudo,TrafficDeviceName,MilkProduction,timeDelta_Seconds,LactationNumber,DaysInMilk,BreedName,Age,label
MilkingEventDateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-08-17 05:49:08,a624fb9a,vms2,7.10,621.0,1.0,11.0,1,1.96,0
2020-08-17 08:38:47,a624fb9a,vms2,8.49,4432.0,1.0,10.0,2,2.03,0
2020-08-17 13:03:21,a624fb9a,vms2,7.29,1075.0,1.0,11.0,1,1.96,0
2020-08-17 14:19:46,a624fb9a,vms1,15.90,2186.0,1.0,109.0,1,2.46,0
2020-08-17 19:51:27,a624fb9a,vms1,7.04,1209.0,1.0,11.0,1,1.96,0
...,...,...,...,...,...,...,...,...,...
2022-08-23 20:35:54,a624fb9a,vms2,10.90,11.0,3.0,60.0,1,4.48,0
2022-08-23 21:01:55,a624fb9a,vms1,8.72,12.0,3.0,17.0,1,3.85,1
2022-08-23 22:31:03,a624fb9a,vms2,10.80,16.0,3.0,15.0,1,3.93,1
2022-08-23 22:39:49,a624fb9a,vms2,11.82,420.0,3.0,27.0,2,4.01,1


In [9]:
df_dataset

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,0.1
0,0.019455,0.246357,0.026576,0.026385,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0
1,0.046693,0.294587,0.190462,0.023747,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0
2,0.019455,0.252949,0.046100,0.026385,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0
3,0.214008,0.551700,0.093876,0.284960,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0
4,0.019455,0.244275,0.051862,0.026385,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
23426,1.000000,0.378210,0.000344,0.155673,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0
23427,0.754864,0.302568,0.000387,0.042216,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1
23428,0.785992,0.374740,0.000559,0.036939,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1
23429,0.817121,0.410132,0.017932,0.068602,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1
