## Checklist / TODO (short)
- Collect ≥300 images (4:3, >=800x600). Organize into `data/train/` and `data/test/`.
- Annotate each image into 64 cell labels (0=no object,1=ball,2=bat,3=stump) and save CSV with columns: `ImageFileName,TrainOrTest,c01..c64`.
- Extract hand-crafted features per cell (color hist, edge density, HOG, LBP, moments).
- Train baseline classifiers (RandomForest/SVM), evaluate, pick final model and save `model_<teamname>.pkl`.
- Produce final predictions CSV and README describing sources & steps.

In [None]:
# Function: split image into 8x8 cells (assumes 800x600 input)
def split_image_to_cells(img, n_rows=8, n_cols=8):
    """Return list of cells in row-major order.
    Each cell is a sub-image (H x W x C). For 800x600 image the cell size will be 100x75 (width x height).
    """
    h, w = img.shape[:2]
    cell_h = h // n_rows
    cell_w = w // n_cols
    cells = []
    for r in range(n_rows):
        for c in range(n_cols):
            y0, y1 = r * cell_h, (r + 1) * cell_h
            x0, x1 = c * cell_w, (c + 1) * cell_w
            cells.append(img[y0:y1, x0:x1].copy())
    return cells

# Small test (uncomment and provide a path to test)
# img = cv2.imread('path/to/800x600_image.jpg')
# cells = split_image_to_cells(img)
# print(len(cells), cells[0].shape)

In [None]:
# Feature extractor (per cell) - compact, hand-crafted features
from skimage.feature import hog, local_binary_pattern

def extract_features_from_cell(cell, rgb_hist_bins=8, hog_orientations=9):
    """Return a 1D numpy array of features for one cell.
    Features included:
      - RGB hist (rgb_hist_bins per channel) -> 3*rgb_hist_bins
      - HSV hue histogram (rgb_hist_bins) -> rgb_hist_bins
      - Mean & std of grayscale -> 2
      - Edge density (Canny) -> 1
      - HOG (small slice to keep vector compact) -> 12 (approx)
      - LBP histogram (P=8, R=1) -> 8
    """
    feats = []
    # Ensure cell has correct dtype
    cell = cell.astype(np.uint8)
    # RGB histograms
    for ch in range(3):
        hist = cv2.calcHist([cell], [ch], None, [rgb_hist_bins], [0,256]).flatten()
        hist = hist / (hist.sum() + 1e-9)
        feats.extend(hist.tolist())
    # HSV hue hist (use Hue channel)
    hsv = cv2.cvtColor(cell, cv2.COLOR_BGR2HSV)
    hue = hsv[:,:,0]
    hhist = cv2.calcHist([hue], [0], None, [rgb_hist_bins], [0,180]).flatten()
    hhist = hhist / (hhist.sum() + 1e-9)
    feats.extend(hhist.tolist())
    # Grayscale stats
    gray = cv2.cvtColor(cell, cv2.COLOR_BGR2GRAY)
    feats.append(float(gray.mean()))
    feats.append(float(gray.std()))
    # Edge density (Canny)
    edges = cv2.Canny(gray, 100, 200)
    feats.append(float(edges.sum()) / (gray.size + 1e-9))
    # HOG (take first 12 elements to keep compact)
    hog_vec = hog(gray, orientations=hog_orientations, pixels_per_cell=(16,16), cells_per_block=(1,1), feature_vector=True)
    hog_take = hog_vec[:12] if len(hog_vec) >= 12 else np.pad(hog_vec, (0, max(0, 12-len(hog_vec))))
    feats.extend(hog_take.tolist())
    # LBP (P=8, R=1) and small histogram of radius patterns
    lbp = local_binary_pattern(gray, P=8, R=1, method='uniform')
    # histogram bins: 0..8 (9 bins) but we'll use 8 bins (drop last) to keep size small
    lbp_hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, 11), range=(0,10))
    lbp_hist = lbp_hist / (lbp_hist.sum() + 1e-9)
    feats.extend(lbp_hist[:8].tolist())
    return np.array(feats, dtype=float)

# Example: extract features for all cells of one image
def extract_features_for_image(img_path):
    img = cv2.imread(str(img_path))
    if img is None:
        raise FileNotFoundError(f'Image not found: {img_path}')
    # Resize to 800x600 if needed (only downscale allowed as per instructions)
    h, w = img.shape[:2]
    if w < 800 or h < 600:
        raise ValueError('Image smaller than 800x600 — do not scale up. Skip this image.')
    if (w, h) != (800, 600):
        img = cv2.resize(img, (800,600), interpolation=cv2.INTER_AREA)
    cells = split_image_to_cells(img)
    feats = [extract_features_from_cell(c) for c in cells]
    return np.vstack(feats)  # shape: (64, n_features)

# Quick usage example (uncomment and set path)
# feats = extract_features_for_image('data/train/IMG_001.jpg')
# print('features per cell shape:', feats.shape)

In [None]:
# Save a small helper function to process a directory and write per-cell feature CSV
def create_features_csv_for_dir(img_dir, out_csv_path, tag='train'):
    rows = []
    for img_file in sorted(Path(img_dir).glob('*')):
        try:
            feats = extract_features_for_image(img_file)  # 64 x n_features
        except Exception as e:
            print('Skipping', img_file, '->', e)
            continue
        # flatten per-image as 64 blocks of features (optionally keep per-cell features separate)
        # For training you'll want per-cell rows: one row per cell with ImageFileName, cell_index, features...
        for i in range(feats.shape[0]):
            row = { 'ImageFileName': img_file.name, 'TrainOrTest': tag, 'cell_index': i+1 }
            # append feature columns
            for j, val in enumerate(feats[i]):
                row[f'f{j+1}'] = val
            rows.append(row)
    df_out = pd.DataFrame(rows)
    df_out.to_csv(out_csv_path, index=False)
    print('Wrote', out_csv_path, 'rows=', len(df_out))

# Example usage (uncomment to run)
# create_features_csv_for_dir(TRAIN_DIR, OUTPUT_DIR / 'train_cell_features.csv', tag='train')

## Next steps (suggested)
- Create `data/train/` and `data/test/` and put images in place. Do not scale up images smaller than 800x600.
- Use the `create_features_csv_for_dir` to extract per-cell features for the train set, then annotate labels in a CSV and join features with labels for training.
- Train classifiers (RandomForest/SVM) on the per-cell features; evaluate and tune.
- When ready, save the model with `joblib.dump(clf, 'model_<teamname>.pkl')` and produce the final predictions CSV in the required format.

Installing icrawler...


NameError: `download_and_prepare` function not found. Ensure you ran the earlier cells that define it.