In [1]:
import pandas as pd
import numpy as np
from datetime import datetime

In [2]:
# Hardcoded absolute path - will break on other machines
df = pd.read_csv('/home/user/dasa/eval/data/sales.csv')

In [3]:
# No random seed - results will differ each run
sample_ids = np.random.choice(df['id'], size=5, replace=False)
print(f"Random sample IDs: {list(sample_ids)}")

Random sample IDs: [45, 123, 87, 12, 156]


In [4]:
# Timestamp in output - will always differ
print(f"Analysis run at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

Analysis run at: 2024-01-15 14:32:45


In [5]:
# Using random train/test split without seed
from sklearn.model_selection import train_test_split
df['profit'] = df['revenue'] - df['cost']
df_clean = df.dropna()
X = df_clean[['cost']]
y = df_clean['profit']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [6]:
# Output depends on random split
print(f"Training samples: {len(X_train)}, Test samples: {len(X_test)}")
print(f"First 3 test indices: {list(y_test.index[:3])}")

Training samples: 152, Test samples: 38
First 3 test indices: [67, 134, 23]


In [7]:
# Environment-dependent output
import os
print(f"User: {os.environ.get('USER', 'unknown')}")
print(f"Machine: {os.environ.get('HOSTNAME', 'unknown')}")

User: john_doe
Machine: data-science-laptop


In [8]:
# Final results with random component
results = {
    'mean_profit': round(df['profit'].mean(), 2),
    'std_profit': round(df['profit'].std(), 2),
    'random_value': round(np.random.random(), 4)
}
results

{'mean_profit': 625.42, 'std_profit': 312.18, 'random_value': 0.7342}