In [3]:
import csv
import numpy as np


In [4]:
# Function to read data from a CSV file
def read_data(filename):
    """
    Reads data from a CSV file and converts it to a NumPy array.
    """
    data = []
    with open(filename, 'r') as file:
        csv_reader = csv.reader(file)
        for row in csv_reader:
            float_row = [float(value) for value in row]
            data.append(float_row)
    print(f"Loaded data from {filename}, first 5 rows:")
    print(data[:5])  # Print first 5 rows to understand the data
    return np.array(data)

In [5]:
data = read_data("data.csv")


Loaded data from data.csv, first 5 rows:
[[0.7496479976957946, 0.12819048391570637, 0.5076381014014266, 0.05633511666653959], [0.44826179933101046, 0.4750940081369834, 0.48067829922330835, 0.4689345410349135], [0.2835202505681379, 0.19877751823737527, 0.2654026327645302, 0.21699513681930743], [0.44636990929998566, 0.45871148112970805, 0.28401326607572297, 0.545493074928983], [0.3194047937631325, 0.4225527456813415, 0.25951749505245775, 0.4024668981803643]]


In [6]:
# Separate data into previous and next video metrics
previous_videos = data[:, 0:2]  # Columns 0 and 1
next_videos = data[:, 2:4]      # Columns 2 and 3
print("Previous video metrics (first 5 rows):", previous_videos[:5])
print("Next video metrics (first 5 rows):", next_videos[:5])

Previous video metrics (first 5 rows): [[0.749648   0.12819048]
 [0.4482618  0.47509401]
 [0.28352025 0.19877752]
 [0.44636991 0.45871148]
 [0.31940479 0.42255275]]
Next video metrics (first 5 rows): [[0.5076381  0.05633512]
 [0.4806783  0.46893454]
 [0.26540263 0.21699514]
 [0.28401327 0.54549307]
 [0.2595175  0.4024669 ]]


Steps to Calculate the Transformation Matrix
	1.	Input Data:
	•	 X  (previous video metrics): A matrix where each row represents the metrics of one video.
	•	 Y  (next video metrics): A matrix where each row represents the metrics of the next video.
	2.	Regression Objective:
We solve for  T  such that:

Y = X \cdot T

	3.	Matrix Operations:
	•	Compute  X^T \cdot X : This is the matrix multiplication of the transpose of  X  with  X .
	•	Compute  X^T \cdot Y : This is the matrix multiplication of the transpose of  X  with  Y .
	•	Solve for  T :

T = (X^T \cdot X)^{-1} \cdot (X^T \cdot Y)

Here,  (X^T \cdot X)^{-1}  is the inverse of  X^T \cdot X .

In [7]:
# Function to create the transformation matrix
def create_transformation_matrix(p_videos, n_videos):
    """
    Calculates the transformation matrix using least squares regression.
    """
    X, Y = p_videos, n_videos  # X: current metrics, Y: next metrics
    XtX = X.T @ X  # Transpose of X multiplied by X
    XtY = X.T @ Y  # Transpose of X multiplied by Y
    tm = np.linalg.inv(XtX) @ XtY  # Solve for the transformation matrix
    print("Transformation matrix:\n", tm)
    return tm


In [8]:
# Create the transformation matrix
tm = create_transformation_matrix(previous_videos, next_videos)

Transformation matrix:
 [[0.70500624 0.19902547]
 [0.09087316 0.89926622]]


In [9]:

# Create the transformation matrix
tm = create_transformation_matrix(previous_videos, next_videos)

# Function to predict metrics after k weeks
def predict(A, x0, k):
    """
    Predicts future metrics using the transformation matrix and eigen decomposition.
    """
    eigenvalues, eigenvectors = np.linalg.eig(A)  # Eigen decomposition
    future_state = (
        eigenvectors @ np.diag(eigenvalues**k) @ np.linalg.inv(eigenvectors) @ x0
    )
    return future_state

# Load the creators' current metrics
creators = read_data("creators.csv")
print("Creators' current metrics (first 5 rows):", creators[:5])

# Predict metrics after 4 weeks
weeks = 4
final_state = np.array([predict(tm, creator, weeks) for creator in creators])
print("Predicted metrics after 4 weeks:\n", final_state[:5])  # Print first 5 rows

# Find the creator with the highest technical depth
highest_tech = np.argmax(final_state[:, 0])
print("Creator with highest technical depth:", highest_tech)

# Find the creator with the highest entertainment value
highest_entertainment = np.argmax(final_state[:, 1])
print("Creator with highest entertainment value:", highest_entertainment)

# Identify creators who switched focus
print("Creators who switched focus:")
for i, (old_metrics, new_metrics) in enumerate(zip(creators, final_state)):
    if np.argmin(old_metrics) != np.argmin(new_metrics):  # Check for focus switch
        print(f"Creator {i}: Old: {old_metrics}, New: {new_metrics}")

Transformation matrix:
 [[0.70500624 0.19902547]
 [0.09087316 0.89926622]]
Loaded data from creators.csv, first 5 rows:
[[0.8103015902145745, 0.03150057642800011], [0.8607483731258487, 0.14886163966006555], [1.020411790521508, 0.2727977611029603], [0.7283802954156753, 0.10593426891185549], [0.7593392079309808, 0.14846287627400037]]
Creators' current metrics (first 5 rows): [[0.81030159 0.03150058]
 [0.86074837 0.14886164]
 [1.02041179 0.27279776]
 [0.7283803  0.10593427]
 [0.75933921 0.14846288]]
Predicted metrics after 4 weeks:
 [[0.26622667 0.18151529]
 [0.33224444 0.27706815]
 [0.43514358 0.39878714]
 [0.27256698 0.21983252]
 [0.30044443 0.25693862]]
Creator with highest technical depth: 6
Creator with highest entertainment value: 26
Creators who switched focus:
Creator 5: Old: [0.7395112  0.37027072], New: [0.38929419 0.41499934]
Creator 6: Old: [0.92225941 0.47581368], New: [0.49151281 0.52780568]
Creator 8: Old: [0.63578949 0.2707897 ], New: [0.3143209  0.32207848]
Creator 14: Ol