# Add THI data for threshold analysis of conception rate
This script assumes that the following scripts have been run in the following order to generate basic dataframe for conception rate, MY and weather data
- BuildingDataset.ipynb
    * Results in "updateDF.csv" which contain the following columns:
        * SE_Number,LactationNumber,Breed,FarmName_Pseudo,AnimalNumber,Del_Cow_Id,
        * BirthDate,Father_SE_Number,Mother_SE_Number,CalvingDate,
        * InseminationDate,PregnancyCheckDate,PregnancyStatus,DryOffDate,
        * CullingDate,ExitReason_PrimaryReasonKok,ExitReason_SecondaryReason1Kok,ExitReason_SecondaryReason2Kok,CullingReason1,CullingReason2,
        * next_calving,next_ins,prev_ins,shift_calf,upper_limit
- FertilityTraits.ipynb
    * Results in "fertilityDF_W.csv" which contains the full dataframe with one observation per insemination and including pregnancy analysis within insemination (i.e. may have more than one observation per insemination)
- HS_fertility_FilteringDataframe.ipynb
    * Results in "fertility_filtered.csv" which contains the following columns with one observation per insemination:
        * SE_Number, Breed, LactationNumber, Parity, InseminationDate, HYS, HeatStress, Milk_Kg, CFI, CLI, FLI, NINS, CR0, CI, GL

In [1]:
import pandas as pd
import numpy as np

# Load conception rate

In [47]:
df = pd.read_csv("../Data/fertilityDF_W_MY_filtered.csv", low_memory=False)
col_keep = ["SE_Number", "LactationNumber", "CalvingDate", "InseminationDate", "next_ins", "prev_ins", "shift_calf", "upper_limit", "CR0"]
df_fert = df[col_keep]
df_fert = df_fert.drop_duplicates(subset=["SE_Number", "LactationNumber", "InseminationDate"])
df_fert

Unnamed: 0,SE_Number,LactationNumber,CalvingDate,InseminationDate,next_ins,prev_ins,shift_calf,upper_limit,CR0
0,SE-064c0cec-1189,7.0,2021-06-24,2021-08-17,,,2022-05-25,2022-05-18,1.0
2,SE-064c0cec-1189,8.0,2022-05-25,2022-07-06,2022-08-16,,,2022-12-20,0.0
3,SE-064c0cec-1189,8.0,2022-05-25,2022-08-16,,2022-07-06,,2022-12-20,0.0
5,SE-30dc5787-1389,5.0,2021-04-26,2021-06-28,,,2022-04-11,2022-03-24,1.0
7,SE-30dc5787-1389,6.0,2022-04-11,2022-05-27,2022-06-18,,,2023-04-25,0.0
...,...,...,...,...,...,...,...,...,...
4590,SE-f454e660-0785,1.0,2023-03-05,2023-07-29,,,,2024-03-29,1.0
4591,SE-f454e660-0790,1.0,2023-05-22,2023-07-16,2023-08-07,,,2024-04-30,0.0
4593,SE-f454e660-0790,1.0,2023-05-22,2023-08-07,2023-08-28,2023-07-16,,2024-04-30,0.0
4595,SE-f454e660-0790,1.0,2023-05-22,2023-08-28,,2023-08-07,,2024-04-30,1.0


In [48]:
"""
# Find example cow, put as comment once script works
SE_Number = ["SE-064c0cec-1189"] #["SE-5c06d92d-3114"] #["SE-a756bc39-1143"]
df_fert = df_fert[df_fert["SE_Number"].isin(SE_Number)]
"""

df_fert

Unnamed: 0,SE_Number,LactationNumber,CalvingDate,InseminationDate,next_ins,prev_ins,shift_calf,upper_limit,CR0
0,SE-064c0cec-1189,7.0,2021-06-24,2021-08-17,,,2022-05-25,2022-05-18,1.0
2,SE-064c0cec-1189,8.0,2022-05-25,2022-07-06,2022-08-16,,,2022-12-20,0.0
3,SE-064c0cec-1189,8.0,2022-05-25,2022-08-16,,2022-07-06,,2022-12-20,0.0
5,SE-30dc5787-1389,5.0,2021-04-26,2021-06-28,,,2022-04-11,2022-03-24,1.0
7,SE-30dc5787-1389,6.0,2022-04-11,2022-05-27,2022-06-18,,,2023-04-25,0.0
...,...,...,...,...,...,...,...,...,...
4590,SE-f454e660-0785,1.0,2023-03-05,2023-07-29,,,,2024-03-29,1.0
4591,SE-f454e660-0790,1.0,2023-05-22,2023-07-16,2023-08-07,,,2024-04-30,0.0
4593,SE-f454e660-0790,1.0,2023-05-22,2023-08-07,2023-08-28,2023-07-16,,2024-04-30,0.0
4595,SE-f454e660-0790,1.0,2023-05-22,2023-08-28,,2023-08-07,,2024-04-30,1.0


In [49]:
# Make start and stop for inseminations
df_fert = df_fert.copy()
df_fert["StartIns"] = df_fert["InseminationDate"]
df_fert["StopIns"] = df_fert["next_ins"].fillna(df_fert["shift_calf"]).fillna(df_fert["upper_limit"])
col_keep = ["SE_Number", "LactationNumber", "CalvingDate", "InseminationDate", "StartIns", "StopIns", "CR0"]
df_fert = df_fert[col_keep]

df_fert.to_csv("test.csv", index=False)

# Load MY and weather data

In [50]:
df_THI = pd.read_csv("../Data/MergedData/MY_weather.csv", low_memory=False)

In [51]:
"""
# Matching weather data for example cow, put as comment once script works
SE_Number = ["SE-064c0cec-1189"] # ["SE-5c06d92d-3114"] #["SE-a756bc39-1143"]
df_THI = df_THI[df_THI["SE_Number"].isin(SE_Number)]
"""
col_keep = ["SE_Number", "LactationNumber", "StartDate"] #, "StartTime", "THI_adj", "MeanTHI_adj"]
df_THI = df_THI[col_keep]

df_THI = df_THI.drop_duplicates(subset=["SE_Number", "LactationNumber", "StartDate"]) #, "StartTime"])
df_THI


Unnamed: 0,SE_Number,LactationNumber,StartDate
0,SE-27c3257a-1492,1.0,2022-10-25
1,SE-27c3257a-1492,1.0,2022-10-26
2,SE-27c3257a-1492,1.0,2022-10-27
3,SE-27c3257a-1492,1.0,2022-10-28
4,SE-27c3257a-1492,1.0,2022-10-29
...,...,...,...
1668993,SE-f454e660-0829,1.0,2024-08-14
1668997,SE-f454e660-0829,1.0,2024-08-15
1669000,SE-f454e660-0829,1.0,2024-08-16
1669003,SE-f454e660-0829,1.0,2024-08-17


Make helper columns to sort insemination data into DF

In [52]:
# Find first and last date within each lactation in MY_weather.csv file
df_THI["StartDate"] = pd.to_datetime(df_THI["StartDate"])

result = df_THI.groupby(["SE_Number", "LactationNumber"])["StartDate"].agg(
    FirstDateMY="min",
    LastDateMY="max"
).reset_index()

result

Unnamed: 0,SE_Number,LactationNumber,FirstDateMY,LastDateMY
0,SE-064c0cec-1189,7.0,2022-01-01,2022-05-24
1,SE-064c0cec-1189,8.0,2022-05-25,2022-12-20
2,SE-27c3257a-1492,1.0,2022-10-25,2023-11-19
3,SE-27c3257a-1492,2.0,2023-11-19,2024-08-18
4,SE-30dc5787-1389,5.0,2022-01-01,2022-04-10
...,...,...,...,...
2550,SE-f454e660-0798,1.0,2023-08-23,2024-08-18
2551,SE-f454e660-0800,1.0,2023-09-24,2024-08-18
2552,SE-f454e660-0803,1.0,2023-08-27,2024-08-18
2553,SE-f454e660-0823,1.0,2023-12-21,2024-08-18


In [53]:
# Add these variables to original df
df_THI = pd.merge(df_THI, result, on=["SE_Number", "LactationNumber"])
df_THI

Unnamed: 0,SE_Number,LactationNumber,StartDate,FirstDateMY,LastDateMY
0,SE-27c3257a-1492,1.0,2022-10-25,2022-10-25,2023-11-19
1,SE-27c3257a-1492,1.0,2022-10-26,2022-10-25,2023-11-19
2,SE-27c3257a-1492,1.0,2022-10-27,2022-10-25,2023-11-19
3,SE-27c3257a-1492,1.0,2022-10-28,2022-10-25,2023-11-19
4,SE-27c3257a-1492,1.0,2022-10-29,2022-10-25,2023-11-19
...,...,...,...,...,...
771747,SE-f454e660-0829,1.0,2024-08-14,2023-11-05,2024-08-18
771748,SE-f454e660-0829,1.0,2024-08-15,2023-11-05,2024-08-18
771749,SE-f454e660-0829,1.0,2024-08-16,2023-11-05,2024-08-18
771750,SE-f454e660-0829,1.0,2024-08-17,2023-11-05,2024-08-18


In [54]:
df_fert

Unnamed: 0,SE_Number,LactationNumber,CalvingDate,InseminationDate,StartIns,StopIns,CR0
0,SE-064c0cec-1189,7.0,2021-06-24,2021-08-17,2021-08-17,2022-05-25,1.0
2,SE-064c0cec-1189,8.0,2022-05-25,2022-07-06,2022-07-06,2022-08-16,0.0
3,SE-064c0cec-1189,8.0,2022-05-25,2022-08-16,2022-08-16,2022-12-20,0.0
5,SE-30dc5787-1389,5.0,2021-04-26,2021-06-28,2021-06-28,2022-04-11,1.0
7,SE-30dc5787-1389,6.0,2022-04-11,2022-05-27,2022-05-27,2022-06-18,0.0
...,...,...,...,...,...,...,...
4590,SE-f454e660-0785,1.0,2023-03-05,2023-07-29,2023-07-29,2024-03-29,1.0
4591,SE-f454e660-0790,1.0,2023-05-22,2023-07-16,2023-07-16,2023-08-07,0.0
4593,SE-f454e660-0790,1.0,2023-05-22,2023-08-07,2023-08-07,2023-08-28,0.0
4595,SE-f454e660-0790,1.0,2023-05-22,2023-08-28,2023-08-28,2024-04-30,1.0


In [55]:
# Merge ins and CR data with MY StartDates
df = pd.merge(df_fert, df_THI, on=["SE_Number", "LactationNumber"], how="left")
df.to_csv("test.csv", index=False)

In [56]:
# Filter and keep only data within window 
df.loc[(df["StartDate"] < df["StartIns"]) | (df["StartDate"] > df["StopIns"]), "InseminationDate"] = np.nan
df.loc[df['InseminationDate'].isna(), 'CR0'] = np.nan
col_keep = ["SE_Number", "LactationNumber", "InseminationDate", "StartDate", "CR0"]
df = df[col_keep]

df = df[df["InseminationDate"].notna()]
df.to_csv("test.csv", index=False)

Add all StartDates

In [57]:
col_keep = ["SE_Number", "LactationNumber", "StartDate"]
df_THI = df_THI[col_keep]

df = pd.merge(df, df_THI, on=["SE_Number", "LactationNumber", "StartDate"], how="outer")
df.to_csv("test.csv", index=False)

Add MY and weather data

In [58]:
df_THI = pd.read_csv("../Data/MergedData/MY_weather.csv", low_memory=False)

"""
# Matching weather data for example cow, put as comment once script works
SE_Number = ["SE-064c0cec-1189"] # ["SE-5c06d92d-3114"] #["SE-a756bc39-1143"]
df_THI = df_THI[df_THI["SE_Number"].isin(SE_Number)]
"""

# Keep only THI, temp and MY data
col_keep = ["SE_Number", "LactationNumber", "StartDate", "StartTime", "THI_adj", "MeanTHI_adj", "Temperature", "MeanTemperature", "TotalYield"]
df_THI = df_THI[col_keep]

df_THI = df_THI.drop_duplicates(subset=["SE_Number", "LactationNumber", "StartDate", "StartTime"])
df_THI

Unnamed: 0,SE_Number,LactationNumber,StartDate,StartTime,THI_adj,MeanTHI_adj,Temperature,MeanTemperature,TotalYield
0,SE-27c3257a-1492,1.0,2022-10-25,,,50.931353,,11.675000,
1,SE-27c3257a-1492,1.0,2022-10-26,,,50.415844,,10.433333,
2,SE-27c3257a-1492,1.0,2022-10-27,,,52.105992,,11.766667,
3,SE-27c3257a-1492,1.0,2022-10-28,,,54.750190,,12.541667,
4,SE-27c3257a-1492,1.0,2022-10-29,,,48.208112,,12.233333,
...,...,...,...,...,...,...,...,...,...
1669003,SE-f454e660-0829,1.0,2024-08-17,08:08:00,60.37578,61.155717,16.9,16.891667,11.00
1669004,SE-f454e660-0829,1.0,2024-08-17,15:18:00,63.71412,61.155717,19.2,16.891667,7.31
1669005,SE-f454e660-0829,1.0,2024-08-17,22:25:00,57.81450,61.155717,12.9,16.891667,7.42
1669006,SE-f454e660-0829,1.0,2024-08-18,08:58:00,,55.808700,,11.100000,11.30


Edit THI and MY data

In [59]:
# Fill THI_adj where missing data due to not milked or missing time stamp in milking records
df_THI['THI_adj'] = df_THI['THI_adj'].fillna(df_THI['MeanTHI_adj'])
df_THI["Temperature"] = df_THI["Temperature"].fillna(df_THI["MeanTemperature"])

In [60]:
# Aggregate THI data to mean value per day
df_THI = df_THI.groupby(['SE_Number', 'LactationNumber', 'StartDate']).agg({
    'THI_adj': 'mean',
    "Temperature": "mean",
    "TotalYield": "sum"
}).reset_index()

df_THI.rename(columns={'THI_adj': 'MeanTHI_adj', "Temperature": "MeanTemp"}, inplace=True)
df_THI

Unnamed: 0,SE_Number,LactationNumber,StartDate,MeanTHI_adj,MeanTemp,TotalYield
0,SE-064c0cec-1189,7.0,2022-01-01,28.198350,-3.100000,30.77
1,SE-064c0cec-1189,7.0,2022-01-02,33.686833,-0.266667,48.22
2,SE-064c0cec-1189,7.0,2022-01-03,37.561110,2.850000,30.53
3,SE-064c0cec-1189,7.0,2022-01-04,31.044000,-0.800000,42.26
4,SE-064c0cec-1189,7.0,2022-01-05,26.800720,-4.000000,38.49
...,...,...,...,...,...,...
771747,SE-f454e660-0829,1.0,2024-08-14,64.076020,17.325000,31.69
771748,SE-f454e660-0829,1.0,2024-08-15,65.716753,21.066667,26.34
771749,SE-f454e660-0829,1.0,2024-08-16,64.264293,19.966667,24.10
771750,SE-f454e660-0829,1.0,2024-08-17,60.634800,16.333333,25.73


Merge into CR0 dataframe

In [61]:
df["StartDate"] = pd.to_datetime(df["StartDate"])
df_THI["StartDate"] = pd.to_datetime(df_THI["StartDate"]) 
df = pd.merge(df, df_THI, on=["SE_Number", "LactationNumber", "StartDate"])
df.to_csv("test.csv", index=False)

Save

In [62]:
df.to_csv("../Data/CR_W_MY.csv", index=False)