# New dataset with one hour shift and the new 32 features

In [None]:
# ----------------------------
# 1. Feature Calculation Logic
# ----------------------------

def calculate_all_features(activity_windows):
    features = []

    for window in activity_windows:
        try:
            window = np.array(window)
            feature_dict = {}

            # Time-Domain Features (1–17)
            feature_dict['Minimum'] = np.min(window)
            feature_dict['Maximum'] = np.max(window)
            feature_dict['Mean'] = np.mean(window)
            feature_dict['RMS'] = np.sqrt(np.mean(np.square(window)))
            feature_dict['STD'] = np.std(window)

            # 6-hour windows
            six_h_windows = [window[i*6:(i+1)*6] for i in range(4)]
            stds_6h = [np.std(w) for w in six_h_windows]
            means_6h = [np.mean(w) for w in six_h_windows]
            feature_dict['MeanSTD6h'] = np.mean(stds_6h)
            feature_dict['STDMean6h'] = np.std(means_6h)

            diffs = np.diff(window)
            feature_dict['STDSD'] = np.std(diffs)


            # RMSSD (successive differences)
            feature_dict['RMSSD'] = np.sqrt(np.mean(np.square(diffs)))

            # Distribution features
            feature_dict['Mode'] = stats.mode(window, keepdims=True)[0][0] if len(window) > 0 else np.nan
            feature_dict['Q10'] = np.percentile(window, 10)
            feature_dict['Q90'] = np.percentile(window, 90)
            feature_dict['Q25'] = np.percentile(window, 25)
            feature_dict['Q50'] = np.percentile(window, 50)
            feature_dict['Q75'] = np.percentile(window, 75)
            feature_dict['Skewness'] = stats.skew(window)
            feature_dict['Kurtosis'] = stats.kurtosis(window)

            # Autocorrelation (lags 1 to 11)
            for lag in range(1, 12):
                if len(window) > lag:
                    corr = np.corrcoef(window[:-lag], window[lag:])[0, 1]
                else:
                    corr = np.nan
                feature_dict[f'Autocorr{lag}'] = corr

            # Frequency-Domain Features (harmonics)
            fft_result = np.abs(fft.fft(window))
            harmonics = fft_result[1:5]
            for i, h in enumerate(harmonics, 1):
                feature_dict[f'h{i}'] = h

            features.append(feature_dict)

        except Exception as e:
            print(f"Error processing window: {e}")
            features.append({f: np.nan for f in FEATURE_NAMES})

    return pd.DataFrame(features)

# Feature name list (same order as above)
FEATURE_NAMES = [
    'Minimum', 'Maximum', 'Mean', 'RMS', 'STD',
    'MeanSTD6h', 'STDMean6h', 'STDSD', 'RMSSD', 'Mode',
    'Q10', 'Q90', 'Q25', 'Q50', 'Q75',
    'Skewness', 'Kurtosis',
    *[f'Autocorr{i}' for i in range(1, 12)],
    *[f'h{i}' for i in range(1, 5)]
]

# ----------------------------
# 2. Apply Feature Extraction
# ----------------------------

features_df = calculate_all_features(shifted_df['activity_window'])



  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [None]:
shifted_df.columns

Index(['cow', 'start_time', 'end_time', 'duration_hours', 'activity_window',
       'oestrus', 'calving', 'lameness', 'mastitis', 'other_disease', 'OK'],
      dtype='object')

In [None]:
# ----------------------------
# 3. Combine with Original Data
# ----------------------------

shifted_df_cleaned = shifted_df.drop(columns=['activity_window'])
features_columns = ['cow', 'start_time', 'end_time', 'duration_hours']
label_columns = [col for col in shifted_df_cleaned.columns if col not in features_columns]

final_df = pd.concat([
    shifted_df_cleaned[features_columns],
    features_df[FEATURE_NAMES],
    shifted_df_cleaned[label_columns]
], axis=1)

# ----------------------------
# 4. Save and Display
# ----------------------------

output_path = "32features_1hour_shift_1.csv"
final_df.to_csv(output_path, index=False)

print(f"\n✅ Processing complete! Results saved to {output_path}")
print(f"📊 Final dataset shape: {final_df.shape}")
print("\n🔍 First 3 rows of the final dataset:")
display(final_df.head(3))


✅ Processing complete! Results saved to 32features_1hour_shift_1.csv
📊 Final dataset shape: (39754, 42)

🔍 First 3 rows of the final dataset:


Unnamed: 0,cow,start_time,end_time,duration_hours,Minimum,Maximum,Mean,RMS,STD,MeanSTD6h,...,h1,h2,h3,h4,oestrus,calving,lameness,mastitis,other_disease,OK
0,151,2015-03-02 01:00:00,2015-03-03 00:00:00,24,-821.62874,1471.43776,266.806858,909.703731,869.698211,582.102664,...,9054.881839,2749.134118,5402.793031,5328.034727,0,0,0,0,0,1
1,151,2015-03-02 02:00:00,2015-03-03 01:00:00,24,-821.62874,1471.43776,260.727509,914.461769,876.505272,688.865117,...,9200.190073,2875.733548,5292.373492,5310.124693,0,0,0,0,0,1
2,151,2015-03-02 03:00:00,2015-03-03 02:00:00,24,-821.62874,1471.43776,260.574873,914.598511,876.693317,746.26021,...,9203.629436,2877.617696,5292.140348,5313.078588,0,0,0,0,0,1


In [None]:
display(final_df.head(24))


Unnamed: 0,cow,start_time,end_time,duration_hours,Minimum,Maximum,Mean,RMS,STD,MeanSTD6h,...,h1,h2,h3,h4,oestrus,calving,lameness,mastitis,other_disease,OK
0,151,2015-03-02 01:00:00,2015-03-03 00:00:00,24,-821.62874,1471.43776,266.806858,909.703731,869.698211,582.102664,...,9054.881839,2749.134118,5402.793031,5328.034727,0,0,0,0,0,1
1,151,2015-03-02 02:00:00,2015-03-03 01:00:00,24,-821.62874,1471.43776,260.727509,914.461769,876.505272,688.865117,...,9200.190073,2875.733548,5292.373492,5310.124693,0,0,0,0,0,1
2,151,2015-03-02 03:00:00,2015-03-03 02:00:00,24,-821.62874,1471.43776,260.574873,914.598511,876.693317,746.26021,...,9203.629436,2877.617696,5292.140348,5313.078588,0,0,0,0,0,1
3,151,2015-03-02 04:00:00,2015-03-03 03:00:00,24,-821.13265,1471.43776,262.275842,913.107192,874.629137,729.288874,...,9170.273156,2877.205955,5265.248174,5275.739072,0,0,0,0,0,1
4,151,2015-03-02 05:00:00,2015-03-03 04:00:00,24,-827.99977,1471.43776,241.827879,926.062472,893.930075,732.949766,...,9491.396989,2667.298899,5754.926014,5349.922811,0,0,0,0,0,1
5,151,2015-03-02 06:00:00,2015-03-03 05:00:00,24,-827.99977,1471.43776,231.884563,933.153749,903.883548,640.289696,...,9602.092796,2487.385133,5936.09696,5173.045504,0,0,0,0,0,1
6,151,2015-03-02 07:00:00,2015-03-03 06:00:00,24,-827.99977,1471.43776,233.088141,934.176781,904.630409,594.098979,...,9595.503918,2515.556461,5933.575014,5200.682914,0,0,0,0,0,1
7,151,2015-03-02 08:00:00,2015-03-03 07:00:00,24,-827.99977,1471.43776,218.765402,914.203682,887.642987,682.144507,...,9590.270481,2189.753674,5718.064976,5133.466697,0,0,0,0,0,1
8,151,2015-03-02 09:00:00,2015-03-03 08:00:00,24,-827.99977,1471.43776,144.999946,909.194634,897.557741,742.649737,...,9295.548347,1715.586493,3969.596025,6597.874933,0,0,0,0,0,1
9,151,2015-03-02 10:00:00,2015-03-03 09:00:00,24,-827.99977,1471.43776,179.451341,903.354889,885.351496,731.060344,...,9599.769253,1312.119233,4676.902163,5899.619695,0,0,0,0,0,1
