# **1. Company Popularity**

In [1]:
import pandas as pd
import plotly.express as px

# Load the dataset from the provided URL
file_url = "https://raw.githubusercontent.com/satyam26en/JOB/main/Clean_Job_File.csv"
df = pd.read_csv(file_url)

# Count the number of job postings per company
company_job_counts = df['company'].value_counts().reset_index()
company_job_counts.columns = ['company', 'job_postings']

# Create an interactive bar chart
fig = px.bar(company_job_counts, x='company', y='job_postings', title='Number of Job Postings per Company', labels={'job_postings': 'Number of Job Postings', 'company': 'Company'})

# Customize the layout for better visibility
fig.update_layout(
    title={
        'text': 'Number of Job Postings per Company',
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    title_font_size=24,
    xaxis_title_font_size=20,
    yaxis_title_font_size=20,
    legend_title_text='Company Ranking',
    legend_title_font_size=20,
    legend_font_size=16,
    legend=dict(
        x=0,
        y=1,
        xanchor='left',
        yanchor='top'
    ),
    updatemenus=[
        dict(
            buttons=list([
                dict(
                    args=[{"y": [company_job_counts['job_postings'][:10]]}],
                    label="Top 10 Companies",
                    method="restyle"
                ),
                dict(
                    args=[{"y": [company_job_counts['job_postings'][-10:]]}],
                    label="Bottom 10 Companies",
                    method="restyle"
                ),
                dict(
                    args=[{"y": [company_job_counts['job_postings']]}],
                    label="All Companies",
                    method="restyle"
                )
            ]),
            direction="down",
            pad={"r": 10, "t": 10},
            showactive=True,
            x=1,
            xanchor="right",
            y=1.15,
            yanchor="top",
            bgcolor='rgba(255, 255, 255, 0.8)',
            bordercolor='black',
            borderwidth=1
        ),
    ]
)

# Add interactive legend
fig.update_traces(marker=dict(line=dict(width=1, color='DarkSlateGrey')),
                  selector=dict(type='bar'))

# Display the interactive chart
fig.show()


In [2]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# Load the dataset from the provided URL
file_url = "https://raw.githubusercontent.com/satyam26en/JOB/main/Clean_Job_File.csv"
df = pd.read_csv(file_url)

# Count the number of job postings per company
company_job_counts = df['company'].value_counts().reset_index()
company_job_counts.columns = ['company', 'job_postings']

# Calculate the average number of job postings per company
average_job_postings = company_job_counts['job_postings'].mean()

# Create a histogram for the distribution of job postings per company
histogram = px.histogram(company_job_counts, x='job_postings', nbins=30, title='Distribution of Job Postings per Company')
histogram.update_layout(
    title={
        'text': 'Distribution of Job Postings per Company',
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    xaxis_title='Number of Job Postings',
    yaxis_title='Number of Companies'
)

# Create a box plot for the distribution of job postings per company
box_plot = px.box(company_job_counts, y='job_postings', title='Box Plot of Job Postings per Company')
box_plot.update_layout(
    title={
        'text': 'Box Plot of Job Postings per Company',
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    yaxis_title='Number of Job Postings'
)

# Add filters for different ranges of job postings
filters = [
    dict(
        args=[{"y": [company_job_counts.query("job_postings <= 10")['job_postings']]}],
        label="0-10 Job Postings",
        method="restyle"
    ),
    dict(
        args=[{"y": [company_job_counts.query("job_postings > 10 and job_postings <= 50")['job_postings']]}],
        label="11-50 Job Postings",
        method="restyle"
    ),
    dict(
        args=[{"y": [company_job_counts.query("job_postings > 50 and job_postings <= 100")['job_postings']]}],
        label="51-100 Job Postings",
        method="restyle"
    ),
    dict(
        args=[{"y": [company_job_counts.query("job_postings > 100")['job_postings']]}],
        label="> 100 Job Postings",
        method="restyle"
    ),
    dict(
        args=[{"y": [company_job_counts['job_postings']]}],
        label="All Job Postings",
        method="restyle"
    )
]

# Update the histogram layout to include filters
histogram.update_layout(
    updatemenus=[
        dict(
            buttons=filters,
            direction="down",
            pad={"r": 10, "t": 10},
            showactive=True,
            x=1,
            xanchor="right",
            y=1.15,
            yanchor="top",
            bgcolor='rgba(255, 255, 255, 0.8)',
            bordercolor='black',
            borderwidth=1
        ),
    ]
)

# Update the box plot layout to include filters
box_plot.update_layout(
    updatemenus=[
        dict(
            buttons=filters,
            direction="down",
            pad={"r": 10, "t": 10},
            showactive=True,
            x=1,
            xanchor="right",
            y=1.15,
            yanchor="top",
            bgcolor='rgba(255, 255, 255, 0.8)',
            bordercolor='black',
            borderwidth=1
        ),
    ]
)

# Display the average number of job postings per company
average_annotation = go.Figure()
average_annotation.add_annotation(
    x=0.5,
    y=1.1,
    text=f"Average Number of Job Postings per Company: {average_job_postings:.2f}",
    showarrow=False,
    font=dict(size=14)
)

# Show the plots
histogram.show()
box_plot.show()
average_annotation.show()


# **3.Company Industry Trends**

In [7]:
import pandas as pd
import plotly.express as px

# Load the dataset from the provided URL
file_url = "https://raw.githubusercontent.com/satyam26en/JOB/main/Clean_Job_File.csv"
df = pd.read_csv(file_url)

# Create a sample column for industry, assuming the dataset has an 'industry' column
# If the dataset does not have an industry column, we'll create a mock one for the sake of visualization
df['industry'] = df['company'].apply(lambda x: 'IT' if 'IT' in x else 'Finance' if 'Bank' in x else 'Healthcare' if 'Hospital' in x else 'Other')

# Count the number of job postings per industry
industry_job_counts = df['industry'].value_counts().reset_index()
industry_job_counts.columns = ['industry', 'job_postings']

# Create an interactive pie chart for industry distribution
pie_chart = px.pie(industry_job_counts, values='job_postings', names='industry', title='Industry Distribution of Job Postings')
pie_chart.update_traces(textposition='inside', textinfo='percent+label')
pie_chart.update_layout(
    title={
        'text': 'Industry Distribution of Job Postings',
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    }
)
# Show the interactive charts
pie_chart.show()



In [6]:
import pandas as pd
import plotly.express as px

# Load the dataset from the provided URL
file_url = "https://raw.githubusercontent.com/satyam26en/JOB/main/Clean_Job_File.csv"
df = pd.read_csv(file_url)

# Calculate the average rating for each company
average_ratings = df.groupby('company')['rating'].mean().reset_index()
average_ratings.columns = ['company', 'average_rating']

# Sort the companies by average rating
average_ratings = average_ratings.sort_values(by='average_rating', ascending=False)

# Create an interactive bar chart for company ratings
bar_chart = px.bar(average_ratings, x='company', y='average_rating', title='Average Company Ratings', labels={'average_rating': 'Average Rating', 'company': 'Company'})
bar_chart.update_layout(
    title={
        'text': 'Average Company Ratings',
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    xaxis_title='Company',
    yaxis_title='Average Rating'
)

# Add filters for different rating ranges
filters = [
    dict(
        args=[{"y": [average_ratings.query("average_rating <= 2")['average_rating']]}],
        label="0-2 Stars",
        method="restyle"
    ),
    dict(
        args=[{"y": [average_ratings.query("average_rating > 2 and average_rating <= 3.5")['average_rating']]}],
        label="2-3.5 Stars",
        method="restyle"
    ),
    dict(
        args=[{"y": [average_ratings.query("average_rating > 3.5 and average_rating <= 4.5")['average_rating']]}],
        label="3.5-4.5 Stars",
        method="restyle"
    ),
    dict(
        args=[{"y": [average_ratings.query("average_rating > 4.5")['average_rating']]}],
        label="> 4.5 Stars",
        method="restyle"
    ),
    dict(
        args=[{"y": [average_ratings['average_rating']]}],
        label="All Ratings",
        method="restyle"
    )
]

# Update the bar chart layout to include filters
bar_chart.update_layout(
    updatemenus=[
        dict(
            buttons=filters,
            direction="down",
            pad={"r": 10, "t": 10},
            showactive=True,
            x=1,
            xanchor="right",
            y=1.15,
            yanchor="top",
            bgcolor='rgba(255, 255, 255, 0.8)',
            bordercolor='black',
            borderwidth=1
        ),
    ]
)

# Show the interactive bar chart
bar_chart.show()
