In [1]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import HoverTool, Span, ColumnDataSource
import os
import pandas as pd
import numpy as np

In [2]:
# Output to the current notebook
output_notebook()

In [3]:
# Specify the directory you want to explore
directory = '/home/manuj/Productivity_System/04 BIN/04 RA RP SP/TRAVEL/BIN/AUDIO'

In [4]:
# Use os.listdir to get files in the directory
filenames = os.listdir(directory)

# Extract the first 3 letters from each filename and convert to integers
numbers = [int(file[:3]) for file in filenames if file[:3].isdigit()]

# Generate a DataFrame from numbers
df = pd.DataFrame(numbers, columns=['Values'])

# Count the frequency of each number
df = df['Values'].value_counts().reset_index()
df.columns = ['Values', 'Frequency']

# Calculate the mean frequency
mean_freq = df['Frequency'].mean()

In [5]:
# df.sort_values(by=['Values'])

In [6]:
# Bokeh uses ColumnDataSource to pass the data
source = ColumnDataSource(df)

# Create a new plot with a title and axis labels
p = figure(x_range=(df['Values'].min(), df['Values'].max()), plot_height=400, title="Histogram of First 3 Letters of Filenames",
           toolbar_location=None, tools="")

p.vbar(x='Values', top='Frequency', width=0.9, source=source, legend_label="Frequency")

# Add a horizontal mean line
mean_line = Span(location=mean_freq, dimension='width', line_color='red', line_dash='dashed', line_width=3)
p.add_layout(mean_line)

# Add a hover tool referring to the formatted columns
hover = HoverTool(tooltips=[('Value', '@Values'), ('Frequency', '@Frequency')])
p.add_tools(hover)

# Move the legend to the upper left corner
p.legend.location = "top_left"

# Show the results
show(p)