In [None]:
# Load the provided dataset for analysis
file_path = '/mnt/data/dairy_cows.csv'
df = pd.read_csv(file_path)

# Apply the mappings and transformations as per the user's instructions
hazard_map = {
    'Underfeeding': 'H_FN', 'Poor pasture quality': 'H_FN', 'Poor quality feed (low nutritive value)': 'H_FN',
    'Poor quality feed (pathogens/toxins)': 'H_FN', 'Too few feeding places ': 'H_FN', 'Too few feeding places indoors': 'H_FN',
    'Insufficient access to water (broken system, poor management)': 'H_FN',
    'Insufficient access to water (inappropriate system design)': 'H_FN',
    'Insufficient space': 'H_HE', 'Poor cubicle design': 'H_HE', 'Poor stall design / stall too small': 'H_HE',
    'Inadequate bedding (cubicle/stall floor)': 'H_HE', 'Fewer cubicles than cows': 'H_HE',
    'Inadequate flooring in passageways, feeding and milking areas': 'H_HE', 'Continuous housing for long periods': 'H_HE',
    'Inadequate maintenance of housing': 'H_HE', 'Insufficient protection from weather': 'H_HE',
    'Excessive noise (constant or sudden)': 'H_HE', 'Lack of environmental enrichment': 'H_HE',
    'Inadequate design of waiting area (size, flooring, crowding gates)': 'H_HE', 'Being tied up temporarily': 'H_HE',
    'Being tied without exercise for long periods ': 'H_HE', 'Use of cow trainers': 'H_HE',
    'Inadequate milking parlour design': 'H_HE', 'Inadequate milking robot design': 'H_HE',
    'Lack of space for exercise and social interactions': 'H_HE', 'Poor calving conditions (pen design)': 'H_HE',
    'Poor calving conditions (absence of pen)': 'H_HE', 'Inadequate housing design': 'H_HE',
    'Early separation of cow and calf': 'H_MH', 'Early separation of cow and calf (after 24h)': 'H_MH',
    'Mixing animals from different groups ': 'H_MH', 'Lack of facilities for sick animals': 'H_MH',
    'Milking robot breakdown': 'H_MH', 'Milking robot not used by cow': 'H_MH', 'Inadequate milking equipment': 'H_MH',
    'Lack of handling/restraining facilities': 'H_MH', 'Poor calving conditions (calving management)': 'H_MH',
    'Delayed calving intervention': 'H_MH', 'Difficulty calving because of the sire': 'H_MH', 'Pasture access': 'H_MH'
}
consequence_map = {
    'Mastitis': 'C_HD', 'Locomotor disorders (including lameness)': 'C_HD', 'Metabolic disorders': 'C_HD',
    'Soft tissue lesions and integument damage': 'C_HD', 'Skin disorders (other than soft tissue lesions and integument damage)': 'C_HD',
    'Bone lesions (incl. fractures and dislocations)': 'C_HD', 'Gastro-enteric disorders': 'C_HD',
    'Reproductive disorders': 'C_HD', 'Teat disorders': 'C_HD', 'Injuries': 'C_HD',
    'General disruption of behaviour': 'C_BW', 'Inability to perform comfort behaviour': 'C_BW',
    'Restriction of movement': 'C_BW', 'Resting problems': 'C_BW', 'Impaired maintenance behaviour': 'C_BW',
    'Impaired social behaviour': 'C_BW', 'Prolonged hunger': 'C_BW', 'Inability to chew and/or ruminate': 'C_BW',
    'Time budgets (disruption of behaviour)': 'C_BW', 'Time budgets (lack of rest)': 'C_BW',
    'Time budgets (prevention of behaviour)': 'C_BW', 'Time budgets (reduced behavioural repertoire)': 'C_BW',
    'Frequency of lying bouts': 'C_BW', 'Duration of lying down movement': 'C_BW',
    'Deviation from normal, uninterrupted getting up movement': 'C_BW',
    'Separation stress': 'C_SR', 'Environmental stress': 'C_SR', 'Group (social) stress': 'C_SR',
    'Handling stress': 'C_SR', 'Heat stress': 'C_SR', 'Cold stress': 'C_SR', 'Isolation stress': 'C_SR',
    'Mortality': 'C_M'
}
indicator_map = {
    'Milk somatic cell count (bulk or individual)': 'I_HP', 'Hot, red, painful udder': 'I_HP',
    'Distended udder': 'I_HP', 'Metabolic disorders': 'I_HP', 'Hock alterations': 'I_HP', 'Knee alterations': 'I_HP',
    'Lesions of the claw': 'I_HP', 'Neck injuries': 'I_HP', 'Teat disorders': 'I_HP', 'Disease': 'I_HP',
    'Body condition scoring': 'I_HP', 'Physiological stress indicators': 'I_HP', 'Respiratory rate and panting ': 'I_HP',
    'Abdominal discomfort': 'I_BW', 'Agonistic behaviour': 'I_BW', 'Agonistic interactions': 'I_BW',
    'Allo-grooming': 'I_BW', 'Altered resting posture': 'I_BW', 'Amount of eye white': 'I_BW',
    'Brush use': 'I_BW', 'Calving behaviour (difficult/long calving)': 'I_BW',
    'Calving records (death of cow)': 'I_BW', 'Calving records (difficulty calving)': 'I_BW',
    'Calving records (duration of calving)': 'I_BW', 'Coat condiition / cleanliness': 'I_BW',
    'Cow Pain Scale (attention towards surroundings, head position, ear position, facial expression, response to approach, back position, lameness)': 'I_BW',
    'Deviation from normal, uninterrupted getting up movement': 'I_BW',
    'Duration of lying down movement': 'I_BW', 'Frequency of lying bouts': 'I_BW', 'Gait assessment': 'I_BW',
    'Huddling': 'I_BW', 'Impaired calving behavour': 'I_BW', 'Impaired maintenance behaviour': 'I_BW',
    'Impaired social behaviour': 'I_BW', 'Increased standing': 'I_BW', 'Injuries': 'I_BW',
    'Lying behaviour synchronisation': 'I_BW', 'Lying time': 'I_BW', 'Self-grooming': 'I_BW',
    'Shivering': 'I_BW', 'Speed': 'I_BW', 'Step activity': 'I_BW', 'Sunburn injury': 'I_BW',
    'Time budgets (disruption of behaviour)': 'I_BW', 'Time budgets (increased standing, decreased lying)': 'I_BW',
    'Time budgets (lack of rest)': 'I_BW', 'Time budgets (prevention of behaviour)': 'I_BW',
    'Time budgets (reduced behavioural repertoire)': 'I_BW', 'Time spent looking out of pen': 'I_BW',
    'Vocalisations  ': 'I_BW', 'Walking distance': 'I_BW', 'Weight loss (adults) or restricted weight gain (calves)': 'I_BW'
}
impact_map = {'High': 3, 'Moderate': 2, 'Low': 1}
ease_map = {'Difficult': 1, 'Moderate': 2, 'Easy': 3}
resources_map = {'High': 3, 'Medium': 2, 'Low': 1}

df['Hazard_Category'] = df['Welfare_Hazards_Animal'].map(hazard_map)
df['Consequence_Category'] = df['Welfare_Hazards_Consequences'].map(consequence_map)
df['Indicator_Category'] = df['Welfare_Indicator'].map(indicator_map)
df['Welfare_Hazards_Impact'] = df['Welfare_Hazards_Impact'].map(impact_map)
df['Ease_of_Hazard_Mitigation'] = df['Ease_of_Hazard_Mitigation'].map(ease_map)
df['Indicator_Ease'] = df['Indicator_Ease'].map(ease_map)
df['Indicator_Resources'] = df['Indicator_Resources'].map(resources_map)

# Selecting numerical columns for PCA
numerical_columns = ['Welfare_Hazards_Impact', 'Ease_of_Hazard_Mitigation', 'Indicator_Ease', 'Indicator_Resources']
df_numerical = df[numerical_columns]

# Standardizing data
scaler = StandardScaler()
df_scaled = scaler.fit_transform(df_numerical)

# Performing PCA
pca = PCA()
pca_result = pca.fit_transform(df_scaled)

# Creating a DataFrame for the PCA results
pca_df = pd.DataFrame(pca_result, columns=[f"PC{i+1}" for i in range(len(pca.explained_variance_ratio_))])

# Adding explained variance ratio for analysis
explained_variance = pca.explained_variance_ratio_

# Plotting explained variance
plt.figure(figsize=(8, 5))
plt.plot(range(1, len(explained_variance) + 1), explained_variance, marker='o', linestyle='--')
plt.title('Explained Variance by Principal Components')
plt.xlabel('Number of Principal Components')
plt.ylabel('Variance Explained')
plt.show()

# Displaying the PCA DataFrame to the user
import ace_tools as tools; tools.display_dataframe_to_user(name="PCA Results and Explained Variance", dataframe=pca_df)
