In [24]:
#Load images from original-images folder and resize them to 800x600 pixels, then save them to resized-images folder
import os
from PIL import Image
input_folder = 'dataset/original-images'
output_folder = 'dataset/resized-images'
os.makedirs(output_folder, exist_ok=True)
for filename in os.listdir(input_folder):
    if filename.endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
        img_path = os.path.join(input_folder, filename)
        with Image.open(img_path) as img:
            img_resized = img.resize((800, 600))
            output_path = os.path.join(output_folder, filename)
            img_resized.save(output_path)
# Resize images to 800x600 pixels and save to a new folder


In [27]:
## Process each image in the resized-images folder and create hog, lbp, and color histogram features for each 8x8 tile, save to combined_hog_lbp_color_features.csv
import numpy as np
import pandas as pd
from skimage.feature import hog, local_binary_pattern
from skimage import io, color
resized_folder = 'dataset/resized-images'
all_data = []
for filename in os.listdir(resized_folder):
    if filename.endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
        img_path = os.path.join(resized_folder, filename)
        img = io.imread(img_path)
        img_array = np.array(img)
        for i in range(0, img_array.shape[0], 8):
            for j in range(0, img_array.shape[1], 8):
                tile = img_array[i:i+8, j:j+8]
                if tile.shape[0] == 8 and tile.shape[1] == 8:
                    # Extract HOG features
                    hog_features = hog(tile, orientations=9,
                    pixels_per_cell=(4, 4),
                    cells_per_block=(1, 1),
                    block_norm='L2-Hys',
                    channel_axis=-1)
                    
                    # Extract LBP features from grayscale tile
                    tile_gray = np.mean(tile, axis=2).astype(np.uint8)
                    lbp = local_binary_pattern(tile_gray, P=8, R=1, method='uniform')
                    (lbp_hist, _) = np.histogram(lbp.ravel(),
                                                 bins=np.arange(0, 10),
                                                 range=(0, 9))
                    lbp_hist = lbp_hist.astype("float")
                    lbp_hist /= (lbp_hist.sum() + 1e-7)
                    
                    # Extract Color Histogram features
                    color_hist = []
                    for channel in range(tile.shape[2]):
                        hist, _ = np.histogram(tile[:, :, channel], bins=8, range=(0, 256))
                        hist = hist.astype("float")
                        hist /= (hist.sum() + 1e-7)
                        color_hist.extend(hist)
                    
                    # Combine all features
                    combined_features = np.hstack([hog_features, lbp_hist, color_hist])
                    all_data.append(combined_features)
                    
# Convert to DataFrame and save to CSV
df = pd.DataFrame(all_data)
df.to_csv('dataset/combined_hog_lbp_color_features.csv', index=False)

In [None]:
import pandas as pd

# Create a list to store all data
all_data = []

# LBP parameters
lbp_radius = 1
lbp_n_points = 8 * lbp_radius
lbp_method = 'uniform'

# Color histogram parameters
hist_bins = 8  # bins per channel

# Process each image in the resized-images folder
for filename in os.listdir(output_folder):
    if filename.endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
        img_path = os.path.join(output_folder, filename)
        with Image.open(img_path) as img:
            img_array = np.array(img)
            
            # Process each 8x8 tile
            row_idx = 0
            for i in range(0, img_array.shape[0], 8):
                col_idx = 0
                for j in range(0, img_array.shape[1], 8):
                    tile = img_array[i:i+8, j:j+8]
                    if tile.shape[0] == 8 and tile.shape[1] == 8:
                        # Extract HOG features
                        hog_features = hog(tile, orientations=9,
                        pixels_per_cell=(8, 8),
                        cells_per_block=(2, 2),
                        block_norm='L2-Hys')
                        
                        # Extract LBP features from grayscale tile
                        tile_gray = np.mean(tile, axis=2).astype(np.uint8)
                        lbp = local_binary_pattern(tile_gray, lbp_n_points, lbp_radius, lbp_method)
                        lbp_hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, lbp_n_points + 3), 
                                                   range=(0, lbp_n_points + 2), density=True)
                        
                        # Extract color histogram features
                        color_hist = []
                        for channel in range(3):  # R, G, B channels
                            hist, _ = np.histogram(tile[:, :, channel], bins=hist_bins, 
                                                  range=(0, 256), density=True)
                            color_hist.extend(hist)
                        
                        # Create row with metadata and features
                        row_data = {
                            'image_name': filename,
                            'cell_row': row_idx,
                            'cell_column': col_idx,
                        }
                        
                        # Add HOG features
                        for feat_idx, feat_val in enumerate(hog_features):
                            row_data[f'hog_{feat_idx}'] = feat_val
                        
                        # Add LBP features
                        for feat_idx, feat_val in enumerate(lbp_hist):
                            row_data[f'lbp_{feat_idx}'] = feat_val
                        
                        # Add color histogram features
                        for feat_idx, feat_val in enumerate(color_hist):
                            row_data[f'color_{feat_idx}'] = feat_val
                        
                        all_data.append(row_data)
                    col_idx += 1
                row_idx += 1

# Create DataFrame and save to CSV
df_combined = pd.DataFrame(all_data)
combined_csv_path = 'dataset/combined_hog_lbp_color_features.csv'
df_combined.to_csv(combined_csv_path, index=False)
print(f"Combined CSV with HOG, LBP, and Color Histogram features saved to {combined_csv_path}")
print(f"Total features per tile: {len(hog_features)} HOG + {len(lbp_hist)} LBP + {len(color_hist)} Color = {len(hog_features) + len(lbp_hist) + len(color_hist)}")

Combined CSV with HOG, LBP, and Color Histogram features saved to dataset/combined_hog_lbp_color_features.csv
Total features per tile: 36 HOG + 10 LBP + 24 Color = 70


In [22]:
from IPython.display import display

#Load combined_hog_lbp_color_features.csv and print the first 5 rows
df = pd.read_csv('dataset/combined_hog_lbp_color_features.csv')
print(df.head())
display(df)

  image_name  cell_row  cell_column     hog_0  hog_1  hog_2  hog_3     hog_4  \
0  IMG_3.jpg         0            0  0.981536    0.0    0.0    0.0  0.191276   
1  IMG_3.jpg         0            1  0.959802    0.0    0.0    0.0  0.280678   
2  IMG_3.jpg         0            2  0.971219    0.0    0.0    0.0  0.238189   
3  IMG_3.jpg         0            3  1.000000    0.0    0.0    0.0  0.000000   
4  IMG_3.jpg         0            4  0.428870    0.0    0.0    0.0  0.428870   

      hog_5    hog_6  ...  color_14  color_15  color_16  color_17  color_18  \
0  0.000000  0.00000  ...       0.0       0.0       0.0   0.00000  0.000000   
1  0.000000  0.00000  ...       0.0       0.0       0.0   0.00000  0.000000   
2  0.000000  0.00000  ...       0.0       0.0       0.0   0.00000  0.020996   
3  0.000000  0.00000  ...       0.0       0.0       0.0   0.00000  0.031250   
4  0.283467  0.42887  ...       0.0       0.0       0.0   0.00293  0.028320   

   color_19  color_20  color_21  color_22  c

Unnamed: 0,image_name,cell_row,cell_column,hog_0,hog_1,hog_2,hog_3,hog_4,hog_5,hog_6,...,color_14,color_15,color_16,color_17,color_18,color_19,color_20,color_21,color_22,color_23
0,IMG_3.jpg,0,0,0.981536,0.000000,0.000000,0.000000,0.191276,0.000000,0.00000,...,0.00000,0.0,0.0,0.00000,0.000000,0.001953,0.029297,0.0,0.0,0.0
1,IMG_3.jpg,0,1,0.959802,0.000000,0.000000,0.000000,0.280678,0.000000,0.00000,...,0.00000,0.0,0.0,0.00000,0.000000,0.006348,0.024902,0.0,0.0,0.0
2,IMG_3.jpg,0,2,0.971219,0.000000,0.000000,0.000000,0.238189,0.000000,0.00000,...,0.00000,0.0,0.0,0.00000,0.020996,0.010254,0.000000,0.0,0.0,0.0
3,IMG_3.jpg,0,3,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.00000,0.0,0.0,0.00000,0.031250,0.000000,0.000000,0.0,0.0,0.0
4,IMG_3.jpg,0,4,0.428870,0.000000,0.000000,0.000000,0.428870,0.283467,0.42887,...,0.00000,0.0,0.0,0.00293,0.028320,0.000000,0.000000,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29995,IMG_6.jpg,74,95,0.309888,0.000000,0.000000,0.000000,0.892899,0.326651,0.00000,...,0.03125,0.0,0.0,0.00000,0.000000,0.000000,0.031250,0.0,0.0,0.0
29996,IMG_6.jpg,74,96,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.00000,...,0.03125,0.0,0.0,0.00000,0.000000,0.000000,0.031250,0.0,0.0,0.0
29997,IMG_6.jpg,74,97,0.408002,0.456491,0.456491,0.456491,0.456491,0.000000,0.00000,...,0.03125,0.0,0.0,0.00000,0.000000,0.000000,0.031250,0.0,0.0,0.0
29998,IMG_6.jpg,74,98,0.087370,0.000000,0.000000,0.000000,0.996176,0.000000,0.00000,...,0.03125,0.0,0.0,0.00000,0.000000,0.000000,0.031250,0.0,0.0,0.0


## Next Steps to Build a Machine Learning Model for Cricket Object Detection

Based on the extracted HOG, LBP, and Color Histogram features, here are the recommended steps to build a machine learning model that can classify and detect balls, bats, and stumps in cricket images:

### 1. **Data Labeling**
- Manually annotate each 8x8 tile in your dataset with labels:
    - `ball`, `bat`, `stump`, or `background`
- Create a new column `label` in your CSV file
- Tools: LabelImg, CVAT, or custom Python script

### 2. **Data Preparation**
- Split data into training (70%), validation (15%), and test (15%) sets
- Balance classes using techniques like:
    - Oversampling minority classes (SMOTE)
    - Undersampling majority class
    - Class weights during training

### 3. **Feature Engineering**
- Normalize/standardize features using `StandardScaler` or `MinMaxScaler`
- Perform dimensionality reduction if needed (PCA, t-SNE)
- Analyze feature importance

### 4. **Model Selection**
Try multiple classifiers:
- **Random Forest**: Good for tabular data
- **XGBoost/LightGBM**: Powerful gradient boosting
- **SVM**: Effective for high-dimensional data
- **Neural Network**: MLP for complex patterns
- **CNN**: Consider using raw image patches instead of hand-crafted features

### 5. **Training & Evaluation**
- Use cross-validation for robust evaluation
- Metrics to track:
    - Accuracy, Precision, Recall, F1-Score
    - Confusion Matrix
    - Per-class performance

### 6. **Post-Processing**
- Apply sliding window approach on test images
- Use Non-Maximum Suppression (NMS) to merge overlapping detections
- Draw bounding boxes around detected objects

### 7. **Advanced Improvements**
- Consider using **deep learning** (YOLO, Faster R-CNN) for better accuracy
- Data augmentation (rotation, flip, brightness)
- Ensemble methods combining multiple models
- Transfer learning with pre-trained models

### 8. **Deployment**
- Save trained model using `joblib` or `pickle`
- Create inference pipeline
- Build API or GUI for real-time detection

Would you like help implementing any of these steps?

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# First, you need to add labels to your dataset
# For now, I'll show you how to split once you have labels

# Load the features
df_features = pd.read_csv('dataset/combined_hog_lbp_color_features.csv')

# Separate metadata from features
metadata_cols = ['image_name', 'cell_row', 'cell_column']
feature_cols = [col for col in df_features.columns if col not in metadata_cols and col != 'label']

# Once you have labels, uncomment and use this:
# X = df_features[feature_cols]
# y = df_features['label']

# # Split into train (70%), validation (15%), test (15%)
# X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.30, random_state=42, stratify=y)
# X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.50, random_state=42, stratify=y_temp)

# # Standardize features
# scaler = StandardScaler()
# X_train_scaled = scaler.fit_transform(X_train)
# X_val_scaled = scaler.transform(X_val)
# X_test_scaled = scaler.transform(X_test)

# print(f"Training set: {X_train.shape[0]} samples")
# print(f"Validation set: {X_val.shape[0]} samples")
# print(f"Test set: {X_test.shape[0]} samples")

print(f"Total tiles: {len(df_features)}")
print(f"Feature columns: {len(feature_cols)}")
print("\nNote: You need to add a 'label' column before splitting the data.")

