In [2]:
import json
import matplotlib.pyplot as plt
import numpy as np

# Load the JSON data from a file
with open('soundcloudLogs.json', 'r') as file:
    data = json.load(file)

# Function to group logs by track name and collect counts over time
def group_logs_and_collect_counts(data):
    grouped_logs = {}
    for item in data:
        name = item.get('name', 'Unknown')
        log = item.get('soundcloudLog', {})
        if name not in grouped_logs:
            grouped_logs[name] = []
        grouped_logs[name].append(log)
    
    # Sort logs by logTime and extract counts
    counts_by_name = {}
    for name, logs in grouped_logs.items():
        logs.sort(key=lambda x: x['logTime'])
        counts_by_name[name] = [(log['logTime'], log['counts']) for log in logs]
    
    return counts_by_name

# Function to remove records where count on day n+1 is smaller than on day n
def remove_invalid_records(counts_by_name):
    cleaned_counts_by_name = {}
    for name, counts in counts_by_name.items():
        cleaned_counts = []
        for i in range(len(counts) - 1):
            current_log_time, current_count = counts[i]
            next_log_time, next_count = counts[i + 1]
            # Keep the record if the next day's count is greater than or equal to the current day's count
            if next_count >= current_count:
                cleaned_counts.append((current_log_time, current_count))
        # Append the last record
        if counts:
            cleaned_counts.append(counts[-1])
        cleaned_counts_by_name[name] = cleaned_counts
    
    return cleaned_counts_by_name

# Group logs and collect counts
counts_by_name = group_logs_and_collect_counts(data)

# Clean the data by removing invalid records
cleaned_counts_by_name = remove_invalid_records(counts_by_name)

# Function to calculate gradient and report unusual changes
def analyze_gradients(counts_by_name):
    unusual_changes = {}
    for name, counts in counts_by_name.items():
        log_times = np.array([log_time for log_time, _ in counts])
        log_counts = np.array([count for _, count in counts])
        
        # Check if log_times has constant values
        if np.all(np.diff(log_times) == 0):
            print(f"Warning: All log_times are constant for '{name}'. Skipping gradient calculation.")
            continue
        
        # Calculate the gradients
        try:
            gradients = np.gradient(log_counts, log_times)
        except ZeroDivisionError:
            print(f"Error: Division by zero encountered while calculating gradients for '{name}'.")
            continue
        
        # Identify unusual changes
        avg_gradient = np.mean(gradients)
        std_gradient = np.std(gradients)
        threshold = avg_gradient + 5 * std_gradient
        
        unusual_points = []
        for i, gradient in enumerate(gradients):
            if gradient > threshold:
                unusual_points.append((log_times[i], log_counts[i], gradient))
        
        if unusual_points:
            unusual_changes[name] = unusual_points
    
    return unusual_changes


# Analyze gradients and report unusual changes on cleaned data
unusual_changes = analyze_gradients(cleaned_counts_by_name)

# Print unusual changes
for name, changes in unusual_changes.items():
    print(f"Unusual changes for '{name}':")
    for log_time, count, gradient in changes:
        print(f"  Log Time: {log_time}, Count: {count}, Gradient: {gradient}")

# The cleaned data is stored in cleaned_counts_by_name and can be used for further analysis or saving to a new JSON file


Unusual changes for 'Be Qiafat Nemiad':
  Log Time: 1721869016225, Count: 819204, Gradient: 0.00030702546388641
Unusual changes for 'Butterfly':
  Log Time: 1718765817007, Count: 68371, Gradient: 0.0004962094906690727
Unusual changes for 'Ghatle Amd':
  Log Time: 1721523416225, Count: 1337229, Gradient: 0.001133206013483594
  Log Time: 1721609816225, Count: 1521678, Gradient: 0.0011301273148148147
Unusual changes for 'HICHKI BE JOZ TO II':
  Log Time: 1718328998630, Count: 576598, Gradient: 3.907407407139534e-05
Unusual changes for 'Hasta La Vista':
  Log Time: 1718765817007, Count: 103489, Gradient: 0.0006153761601033363
Unusual changes for 'Khaterate Kohan':
  Log Time: 1717967251465, Count: 2630, Gradient: 3.235221674594349e-07
Unusual changes for 'Nagi Jayi':
  Log Time: 1717967251465, Count: 28035, Gradient: 1.138926211757818e-06
Unusual changes for 'Tehroon':
  Log Time: 1719171785062, Count: 1185979, Gradient: 0.00010142713388968695
Unusual changes for 'Ye Jaye Door':
  Log Time

In [6]:
import json
import matplotlib.pyplot as plt
import numpy as np

# Load the JSON data from a file
with open('soundcloudLogs.json', 'r') as file:
    data = json.load(file)

# Function to group logs by track name and collect counts over time
def group_logs_and_collect_counts(data):
    grouped_logs = {}
    for item in data:
        name = item.get('name', 'Unknown')
        log = item.get('soundcloudLog', {})
        if name not in grouped_logs:
            grouped_logs[name] = []
        grouped_logs[name].append(log)
    
    # Sort logs by logTime and extract counts
    counts_by_name = {}
    for name, logs in grouped_logs.items():
        logs.sort(key=lambda x: x['logTime'])
        counts_by_name[name] = [(log['logTime'], log['counts']) for log in logs]
    
    return counts_by_name

# Function to remove records where count on day n+1 is smaller than on day n
def remove_invalid_records(counts_by_name):
    cleaned_counts_by_name = {}
    for name, counts in counts_by_name.items():
        cleaned_counts = []
        for i in range(len(counts) - 1):
            current_log_time, current_count = counts[i]
            next_log_time, next_count = counts[i + 1]
            # Keep the record if the next day's count is greater than or equal to the current day's count
            if next_count >= current_count:
                cleaned_counts.append((current_log_time, current_count))
        # Append the last record
        if counts:
            cleaned_counts.append(counts[-1])
        cleaned_counts_by_name[name] = cleaned_counts
    
    return cleaned_counts_by_name

# Function to calculate gradient and report unusual changes
def analyze_gradients(counts_by_name):
    unusual_changes = {}
    track_probabilities = {}

    for name, counts in counts_by_name.items():
        log_times = np.array([log_time for log_time, _ in counts])
        log_counts = np.array([count for _, count in counts])
        
        # Check if log_times has constant values
        if len(log_times) < 2 or np.all(np.diff(log_times) == 0):
            print(f"Warning: Not enough data or constant log_times for '{name}'. Skipping gradient calculation.")
            continue
        
        # Calculate the gradients
        try:
            gradients = np.gradient(log_counts, log_times)
        except ZeroDivisionError:
            print(f"Error: Division by zero encountered while calculating gradients for '{name}'.")
            continue
        
        # Identify unusual changes
        avg_gradient = np.mean(gradients)
        std_gradient = np.std(gradients)
        threshold = avg_gradient + 5 * std_gradient
        
        unusual_points = []
        for i, gradient in enumerate(gradients):
            if gradient > threshold:
                unusual_points.append((log_times[i], log_counts[i], gradient))
        
        if unusual_points:
            unusual_changes[name] = unusual_points
            # Calculate the probability of unusual points
            track_probability = len(unusual_points) / len(counts) if len(counts) > 0 else 0
            track_probabilities[name] = track_probability
    
    return unusual_changes, track_probabilities

# Function to plot stream counts and highlight unusual points for tracks with unusual changes
def plot_stream_counts_with_unusual_points(cleaned_counts_by_name, unusual_changes):
    for idx, (name, unusual_points) in enumerate(unusual_changes.items(), start=1):
        counts = cleaned_counts_by_name[name]
        log_times = [log_time for log_time, _ in counts]
        log_counts = [count for _, count in counts]
        
        plt.figure(figsize=(10, 5))
        plt.plot(log_times, log_counts, label='Stream Counts', color='blue')
        
        # Highlight unusual points
        unusual_times = [time for time, _, _ in unusual_points]
        unusual_counts = [count for _, count, _ in unusual_points]
        plt.scatter(unusual_times, unusual_counts, color='red', label='Unusual Points')
        
        plt.title(f'Stream Counts for {name}')
        plt.xlabel('Log Time')
        plt.ylabel('Stream Count')
        plt.legend()
        
        # Save the plot as a JPG file with a unique name
        filename = f'soundcloud{idx}.jpg'
        plt.savefig(filename, format='jpg')
        plt.close()  # Close the plot to free up memory

# Group logs and collect counts
counts_by_name = group_logs_and_collect_counts(data)

# Clean the data by removing invalid records
cleaned_counts_by_name = remove_invalid_records(counts_by_name)

# Analyze gradients, report unusual changes, and calculate probability for each track with unusual changes
unusual_changes, track_probabilities = analyze_gradients(cleaned_counts_by_name)

# Print the probability of fake streams for each track with unusual changes
for name, probability in track_probabilities.items():
    print(f"Track '{name}' has a fake stream probability of {probability:.2f}")

# Plot stream counts and highlight unusual points only for tracks with unusual changes
plot_stream_counts_with_unusual_points(cleaned_counts_by_name, unusual_changes)


Track 'Be Qiafat Nemiad' has a fake stream probability of 0.02
Track 'Butterfly' has a fake stream probability of 0.02
Track 'Ghatle Amd' has a fake stream probability of 0.03
Track 'HICHKI BE JOZ TO II' has a fake stream probability of 0.02
Track 'Hasta La Vista' has a fake stream probability of 0.02
Track 'Khaterate Kohan' has a fake stream probability of 0.02
Track 'Nagi Jayi' has a fake stream probability of 0.02
Track 'Tehroon' has a fake stream probability of 0.02
Track 'Ye Jaye Door' has a fake stream probability of 0.02
