In [1]:
%matplotlib notebook
import pandas as pd
import os
from datetime import datetime

## Defining spark-rats export info

In [2]:
export_path = "../export"
df_files = ["border_info.parquet","infecteds_history.parquet","infected_tiles.parquet"]

dates = os.listdir(export_path)
dates

['2020-07-27 04:44']

## Reading spark-rats data

In [3]:
date_file_format = '%Y-%m-%d %H:%M'

def df_info_to_dict(df):
    return df.T.to_dict()[0]

def read_stat_df(parquet_stat, date_dir, export_dir="../export"):
    parquet_dir = f"{export_dir}/{date_dir}/{parquet_stat}"
    
    # We need spark repartition to just one file on save stats data
    parquet_files = [f for f in os.listdir(parquet_dir) if f.endswith(".parquet")]
    df = pd.read_parquet(f"{parquet_dir}/{parquet_files[0]}")
    df['creation_date'] = datetime.strptime(d, date_file_format)
    
    # cast single row info dataframes to dict
    if len(df) == 1:
        df = df_info_to_dict(df)
    return df

In [4]:
simulations = {}
for d in dates:
    simulation_date = datetime.strptime(d, date_file_format)
    simulations[d] = {"date":simulation_date}
    for dff in df_files:
        simulations[d][dff] = read_stat_df(dff, d)

In [5]:
sims_dates = list(simulations.keys())
print("sim_dates:")
print(sims_dates)
print("\n")
print("df_files:")
print(df_files)

sim_dates:
['2020-07-27 04:44']


df_files:
['border_info.parquet', 'infecteds_history.parquet', 'infected_tiles.parquet']


In [None]:
simulations[sims_dates[3]]["border_info.parquet"]

In [None]:
last_simulation = simulations[sims_dates[1]]

## Printing infected tiles

In [None]:
df = last_simulation["infected_tiles.parquet"]
df.head(3)

In [None]:
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import numpy as np

fig = plt.figure(figsize=(8, 3))
ax1 = fig.add_subplot(121, projection='3d')

x,y = df.tile_x, df.tile_y
top = df.infected
bottom = np.zeros_like(top)
width = depth = 2

ax1.bar3d(x, y, bottom, width, depth, top, shade=True)
ax1.set_title('Infecteds Areas')

## Infected Tiles - evolution 

In [None]:
frames = []
for s in sims_dates:
    infected_tiles = simulations[s]["infected_tiles.parquet"]
    infected_tiles['creation_str'] = s
    frames.append(infected_tiles)
all_infected_tiles = pd.concat(frames).sort_values("creation_date",ascending=True).reset_index()

In [None]:
all_infected_tiles.head(3)

In [None]:
import plotly.express as px

all_infected_tiles['infected_px'] = all_infected_tiles.infected*1

px.scatter(all_infected_tiles, x="tile_x", y="tile_y", animation_frame="creation_str",
           size="infected_px", hover_name="infected",
            size_max=10, range_x=[0,200], range_y=[0,200])

In [None]:
last_simulation['infecteds_history.parquet']