In [None]:
# -------------------------------
# 1. Import Required Libraries
# -------------------------------
import pandas as pd
import re
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# -------------------------------
# 2. Load Dataset
# -------------------------------
df = pd.read_csv("/content/Jobs.csv")

# -------------------------------
# 3. Data Cleaning
# -------------------------------
df["salary"] = df["salary"].str.replace("??", "").str.strip()
df["location"] = df["location"].fillna("Not Specified")
df["Type_of_job"] = df["Type_of_job"].str.strip()
df["company_name"] = df["company_name"].str.strip()
df["location"] = df["location"].str.strip()

# -------------------------------
# 4. Salary Processing
# -------------------------------
def extract_salary(salary_text):
    numbers = re.findall(r"\d+\.?\d*", str(salary_text))
    if len(numbers) == 2:
        return float(numbers[0]), float(numbers[1])
    elif len(numbers) == 1:
        return float(numbers[0]), float(numbers[0])
    else:
        return None, None

df[["min_salary", "max_salary"]] = df["salary"].apply(lambda x: pd.Series(extract_salary(x)))
df["avg_salary"] = df[["min_salary", "max_salary"]].mean(axis=1)
df_clean = df.dropna(subset=["avg_salary"])

# -------------------------------
# 5. Analysis
# -------------------------------
top_locations = df["location"].value_counts().head(5)
top_jobs = df["Type_of_job"].value_counts().head(5)
exp_dist = df["experience"].value_counts().head(5)
min_salary = df_clean["avg_salary"].min()
max_salary = df_clean["avg_salary"].max()
mean_salary = df_clean["avg_salary"].mean()

# -------------------------------
# 6. Interactive Dashboard
# -------------------------------
fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=("Top 5 Job Locations", "Top 5 Job Titles", "Experience Distribution", "Salary Distribution"),
    specs=[[{"type": "bar"}, {"type": "bar"}],
           [{"type": "pie"}, {"type": "histogram"}]]
)

# Chart 1: Top Locations
fig.add_trace(
    go.Bar(x=top_locations.index, y=top_locations.values, marker_color="skyblue", name="Locations"),
    row=1, col=1
)

# Chart 2: Top Job Titles
fig.add_trace(
    go.Bar(x=top_jobs.index, y=top_jobs.values, marker_color="orange", name="Job Titles"),
    row=1, col=2
)

# Chart 3: Experience Distribution
fig.add_trace(
    go.Pie(labels=exp_dist.index, values=exp_dist.values, name="Experience"),
    row=2, col=1
)

# Chart 4: Salary Distribution
fig.add_trace(
    go.Histogram(x=df_clean["avg_salary"], nbinsx=20, marker_color="green", name="Salary (LPA)"),
    row=2, col=2
)

# Layout settings
fig.update_layout(
    title_text="📊 Interactive Jobs Data Dashboard",
    title_x=0.5,
    showlegend=True,
    height=800,
    width=1000
)

fig.show()

# -------------------------------
# 7. Summary Report
# -------------------------------
print("\n📌 SUMMARY REPORT")
print("Total Jobs Scraped:", len(df))
print("\nTop 5 Locations:\n", top_locations)
print("\nTop 5 Job Titles:\n", top_jobs)
print("\nExperience Distribution:\n", exp_dist)

print("\nSalary Insights (LPA):")
print(f"Minimum Salary: {min_salary:.2f} LPA")
print(f"Maximum Salary: {max_salary:.2f} LPA")
print(f"Average Salary: {mean_salary:.2f} LPA")



📌 SUMMARY REPORT
Total Jobs Scraped: 5805

Top 5 Locations:
 location
Mumbai            766
Bangalore         749
Work from home    703
Delhi             595
Gurgaon           419
Name: count, dtype: int64

Top 5 Job Titles:
 Type_of_job
Business Development Executive    393
Business Development Associate    197
Graphic Designer                  155
Business Development Manager      152
Sales Executive                   117
Name: count, dtype: int64

Experience Distribution:
 experience
0-2 years    3559
0-5 years     898
1-3 years     504
4-6 years     277
3-5 years      98
Name: count, dtype: int64

Salary Insights (LPA):
Minimum Salary: 2.00 LPA
Maximum Salary: 75.60 LPA
Average Salary: 3.63 LPA
