In [26]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.cluster import KMeans

# Load your dataset
df = pd.read_csv("sy.csv", encoding='latin1')

# --- Step 1: Clean and Prepare Geo_Location ---
df['Geo_Location'] = df['Geo_Location'].str.replace('(', '', regex=False).str.replace(')', '', regex=False).str.strip()
df[['Latitude', 'Longitude']] = df['Geo_Location'].str.split(',', expand=True)
df['Latitude'] = df['Latitude'].astype(float)
df['Longitude'] = df['Longitude'].astype(float)

# Encode Crime_Location_Type (optional for clustering logic)
le = LabelEncoder()
df['Crime_Location_Encoded'] = le.fit_transform(df['Crime_Location_Type'])

# --- Step 2: Police Deployment Based on Crime Type and Crime_Location_Type ---
df['Head_clean'] = df['Head'].str.lower().str.strip()

# Display crime types
print("\n✅ Available Crime Types:")
print(df['Head'].dropna().unique())

# Input crime type
user_crime_input = input("Enter a crime type: ").lower().strip()

# Match crime
matched_crime = None
for crime in df['Head_clean'].unique():
    if user_crime_input in crime:
        matched_crime = crime
        break

if not matched_crime:
    print("❌ No matching crime type found.")
else:
    filtered_crime_df = df[df['Head_clean'] == matched_crime]
    matched_display = df[df['Head_clean'] == matched_crime]['Head'].iloc[0]
    print(f"\n✅ Interpreted crime type as: {matched_display}")

    top_station = filtered_crime_df['Police Station'].value_counts().idxmax()
    top_station_cases = filtered_crime_df['Police Station'].value_counts().max()
    top_location = filtered_crime_df['Crime_Location_Type'].value_counts().idxmax()
    top_location_cases = filtered_crime_df['Crime_Location_Type'].value_counts().max()

    print(f"\n📍 Police Station with most cases: {top_station} ({top_station_cases} cases)")
    print(f"📌 Location type with most crime: {top_location} ({top_location_cases} cases)")

    fixed_police_per_station = 50
    excluded_locations = ['InHouse']  # Add more if needed

    for station in df['Police Station'].unique():
        station_df = filtered_crime_df[filtered_crime_df['Police Station'] == station]

        if station_df.empty:
            print(f"\n🚓 Police Allocation for {station} (Total Crimes: 0):")
            print("  No crimes of this type reported. Allocate minimum patrol if needed.")
            continue

        location_counts = station_df['Crime_Location_Type'].value_counts().to_dict()
        total_crimes = sum(location_counts.values())

        print(f"\n🚓 Police Allocation for {station} (Total Crimes: {total_crimes}):")

        # Remove excluded locations like 'InHouse'
        deployable_locations = {
            loc: count for loc, count in location_counts.items() if loc not in excluded_locations
        }

        if not deployable_locations:
            print("  ⚠️ All reported crimes are in non-deployable areas like houses. Assign general patrolling if needed.")
            continue

        # Get top 4 deployable locations
        top_locations = dict(sorted(deployable_locations.items(), key=lambda item: item[1], reverse=True)[:4])

        for location_type, count in top_locations.items():
            percent = count / total_crimes
            officers_assigned = round(percent * fixed_police_per_station)
            print(f"  {location_type}: {officers_assigned} officers")

    # --- Step 3: Optional Clustering Without Visualization ---
    clustering_df = filtered_crime_df.dropna(subset=['Latitude', 'Longitude'])

    if not clustering_df.empty and len(clustering_df) >= 3:
        coords = clustering_df[['Latitude', 'Longitude']]
        kmeans = KMeans(n_clusters=3, random_state=42)
        clustering_df['Cluster'] = kmeans.fit_predict(coords)

        # You can use clustering_df['Cluster'] for further logic if needed
    else:
        print("\n⚠️ Not enough data with coordinates to perform clustering.")



✅ Available Crime Types:
['Theft (Auto Theft)' 'Theft (House Theft)' 'Theft (HBT Night)'
 'Assault (Hurt)' 'Drug Related (NDPS)' 'Theft (Chain Snatching)'
 'Theft (HBT Day)' 'Assault (Hurt) ' 'Drug Related (NDPS) '
 'Theft (Robbery)' 'Theft (other  Theft )']



✅ Interpreted crime type as: Assault (Hurt)

📍 Police Station with most cases: Anjuna_Ps (107 cases)
📌 Location type with most crime: InHouse (44 cases)

🚓 Police Allocation for Anjuna_Ps (Total Crimes: 107):
  ClubArea: 9 officers
  localArea: 9 officers
  ResidentialArea: 7 officers
  LocalArea: 6 officers

🚓 Police Allocation for Mapusa_Ps (Total Crimes: 80):
  ResidentialArea: 11 officers
  ReligiousPlace: 8 officers
  LocalArea: 7 officers
  MarketZone: 6 officers

🚓 Police Allocation for Colvale_PS (Total Crimes: 45):
  LocalArea: 11 officers
  ResidentialArea: 10 officers
  IndustrialArea: 6 officers
  Business/Store: 4 officers


In [36]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.cluster import KMeans

# Load dataset
df = pd.read_csv("sy.csv", encoding='latin1')

# Clean and prepare Geo_Location
df['Geo_Location'] = df['Geo_Location'].str.replace('(', '', regex=False).str.replace(')', '', regex=False).str.strip()
df[['Latitude', 'Longitude']] = df['Geo_Location'].str.split(',', expand=True)
df['Latitude'] = df['Latitude'].astype(float)
df['Longitude'] = df['Longitude'].astype(float)

# Extract Month from Date
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
df['Month'] = df['Date'].dt.month_name()

# Encode for clustering
le = LabelEncoder()
df['Crime_Location_Encoded'] = le.fit_transform(df['Crime_Location_Type'])

# Clean crime type names
df['Head'] = df['Head'].astype(str).str.strip().str.replace(r'\s+', ' ', regex=True)
df['Head_clean'] = df['Head'].str.lower().str.strip()

# Display available crime types
print("\nAvailable Crime Types:")
print(df['Head'].dropna().unique())

# Input crime type
user_crime_input = input("Enter a crime type: ").lower().strip()

# Match crime
matched_crime = None
for crime in df['Head_clean'].unique():
    if user_crime_input in crime:
        matched_crime = crime
        break

if not matched_crime:
    print("No matching crime type found.")
else:
    filtered_crime_df = df[df['Head_clean'] == matched_crime]
    matched_display = df[df['Head_clean'] == matched_crime]['Head'].iloc[0]
    print(f"\nInterpreted crime type as: {matched_display}")

    top_station = filtered_crime_df['Police Station'].value_counts().idxmax()
    top_station_cases = filtered_crime_df['Police Station'].value_counts().max()
    top_location = filtered_crime_df['Crime_Location_Type'].value_counts().idxmax()
    top_location_cases = filtered_crime_df['Crime_Location_Type'].value_counts().max()

    print(f"\nPolice Station with most cases: {top_station} ({top_station_cases} cases)")
    print(f"Location type with most crime: {top_location} ({top_location_cases} cases)")

    for station in df['Police Station'].unique():
        station_df = filtered_crime_df[filtered_crime_df['Police Station'] == station]

        print(f"\nPolice Allocation for {station}:")

        if station_df.empty:
            print("  No crimes of this type reported. Allocate minimum patrol if needed.")
            continue

        location_counts = station_df['Crime_Location_Type'].value_counts()
        total_crimes = location_counts.sum()

        top_locations = dict(sorted(location_counts.items(), key=lambda x: x[1], reverse=True)[:4])

        for location_type, count in top_locations.items():
            if location_type.strip().lower() == 'inhouse':
                print(f"  {location_type}: Investigate on a case-by-case basis. No direct deployment.")
            else:
                percent = count / total_crimes
                if percent >= 0.3:
                    print(f"  {location_type}: More staff should be deployed.")
                elif 0.1 <= percent < 0.3:
                    print(f"  {location_type}: Moderate staff deployment recommended.")
                else:
                    print(f"  {location_type}: Fewer staff required.")

        # Peak months analysis
        top_months = (
            station_df['Month'].value_counts().head(3).index.tolist()
        )
        if top_months:
            print("  Peak Months:", ', '.join(top_months))

    # Clustering logic
    clustering_df = filtered_crime_df.dropna(subset=['Latitude', 'Longitude'])
    if not clustering_df.empty and len(clustering_df) >= 3:
        coords = clustering_df[['Latitude', 'Longitude']]
        kmeans = KMeans(n_clusters=3, random_state=42)
        clustering_df['Cluster'] = kmeans.fit_predict(coords)
        # You can use clustering_df['Cluster'] for further logic
    else:
        print("\nNot enough data with coordinates to perform clustering.")



Available Crime Types:
['Theft (Auto Theft)' 'Theft (House Theft)' 'Theft (HBT Night)'
 'Assault (Hurt)' 'Drug Related (NDPS)' 'Theft (Chain Snatching)'
 'Theft (HBT Day)' 'Theft (Robbery)' 'Theft (other Theft )']

Interpreted crime type as: Theft (HBT Day)

Police Station with most cases: Mapusa_Ps (16 cases)
Location type with most crime: InHouse (11 cases)

Police Allocation for Anjuna_Ps:
  InHouse: Investigate on a case-by-case basis. No direct deployment.
  ResidentialArea: Moderate staff deployment recommended.
  Peak Months: July

Police Allocation for Mapusa_Ps:
  InHouse: Investigate on a case-by-case basis. No direct deployment.
  ReligiousPlace: Moderate staff deployment recommended.
  ParkingLot: Fewer staff required.
  MarketZone: Fewer staff required.
  Peak Months: July, June, April

Police Allocation for Colvale_PS:
  ResidentialArea: More staff should be deployed.
  ParkingLot: More staff should be deployed.
  NearCollege: Moderate staff deployment recommended.
  Loc