In [13]:
import json
import plotly.graph_objs as go
import plotly.offline as py_offline
import chart_studio.plotly as py

def readdata(file, topicCount):
    datasets = []
    with open(file, 'r') as f:
        for line in f:
            try:
                data = json.loads(line)
                datasets.append(data)
            except json.JSONDecodeError:
                print(f"Ignoring invalid JSON: {line.strip()}")

    dataset = {}
    for entry in datasets[:topicCount]:
        topic = entry.get('Topic')
        if topic is not None:
            dataset[topic] = entry.get('Data', [])
    
    return dataset

def normalize(values):
    total = sum(float(value.split(':')[1]) for value in values)
    return [total * 1000 / len(values)]

def build_text(key, values):
    return [f'Topic {key}:<br>' + '<br>'.join(values)]

def build_trace(dataset):
    traces = []
    for key in sorted(dataset.keys()):
        normalized_values = normalize(dataset[key])
        trace = go.Scatter(
            x=[key],
            y=normalized_values,
            text=build_text(key, dataset[key]),
            mode='markers',
            name=str(key),
            marker=dict(
                sizemode='diameter',
                sizeref=0.85,
                size=normalized_values,
                line=dict(width=2)
            )
        )
        traces.append(trace)
    return traces

def build_layout():
    layout = go.Layout(
        title='Cluster of Topic0 - Topic10 by LDP',
        showlegend=False,
        height=600,
        width=800,
        xaxis=dict(
            title='Topic ID',
            gridcolor='rgb(255, 255, 255)',
            zerolinewidth=1,
            ticklen=5,
            gridwidth=1,
        ),
        yaxis=dict(
            title='Group Frequency (1000x)',
            gridcolor='rgb(255, 255, 255)',
            zerolinewidth=1,
            ticklen=5,
            gridwidth=2,
        ),
        paper_bgcolor='rgb(243, 243, 243)',
        plot_bgcolor='rgb(243, 243, 243)',
    )
    return layout

def main(file):
    dataset = readdata(file, 10)
    traces = build_trace(dataset)
    layout = build_layout()
    fig = go.Figure(data=traces, layout=layout)
    plot_url = py_offline.plot(fig, filename='task1.1 LDP Topic Sample', auto_open=False)
    print(f'Plot saved to {plot_url}')

if __name__ == '__main__':
    main(r'c:\Users\kasam\OneDrive\Desktop\yelp_dataset_challenge_academic_dataset/yelp_academic_dataset_review.json')




Your filename `task1.1 LDP Topic Sample` didn't end with .html. Adding .html to the end of your file.



Plot saved to task1.1 LDP Topic Sample.html
