# Extract changed chunks

With this script, the IDs (or chunk coordinates) of changed chunks from a Minecraft change trace can be extracted.

In [1]:
import csv
import sys
import pandas as pd

Please specify the changelog file as filename in the Python-Code below:

In [5]:
inputfilename = "changelog-distributed-players.csv"
outputfilename = "ChunkChanges-distributed.csv"

The following code parses the changelog file and stores the changed chunks of every tick in a Map, where TickNo -> list(chunkId)

In [6]:
with open(inputfilename, 'r') as f:
    # skip header (three lines in this file)
    # 20-10-2018_19:34:40:724	time	type="block"	xpos	ypos	zpos	world	chunk	section	material	skylight	emittedLight	BlockData
    # 20-10-2018_19:34:40:724	time	type="entity"	xpos	ypos	zpos	world	chunk	section	uuid	[changed attributes]
    # 20-10-2018_19:34:40:724	time	type="status"	#loadedChunks	#changedChunks	#tileEntities	#changedTileEntities	#entities	#changedEntities	#onlinePlayers	totalStateDiffTime
    headerlen = 3
    for i in range(0,headerlen): # skip over the above mentioned three header lines
        next(f)

    status_line_count = 0 # running counter as to measure interval length (splitting up lines)
    # number of status messages (processed state diffs) / number of combined intervals
    interval_size = 2 # 1 * 0.5s -> half second intervals, 2 * 0.5s -> one second intervals (lines of two intervals "merged"/processed as one unit)
    world_filter=[]

    # consecutive intervals (packages of lines/change-entries) are pushed to these sinks
    #data_sinks = [IntervalStatPrinter(INTERVAL_LENGTH_SECONDS * interval_size), ChangeTypeTotalsPlot(), ChangesPerTypePlot(), EntityAddRemovePlot(),
    #              StatusInformationPlot()]
    #data_sinks = [InterIntervalTimeChanges(TICKS_PER_SECOND), ChangeTypeTotalsPlot(TICKS_PER_SECOND), StatisticChangesPerType(TICKS_PER_SECOND)]
    #data_sinks = [PlotChunkChanges(), InterIntervalTimeChanges(TICKS_PER_SECOND), IntervalStatPrinter(INTERVAL_LENGTH_SECONDS * interval_size), ChangeTypeTotalsPlot(TICKS_PER_SECOND), StatusInformationPlot()]
    #data_sinks = [InterIntervalTimeChanges(TICKS_PER_SECOND), IntervalStatPrinter(INTERVAL_LENGTH_SECONDS * interval_size), ChangeTypeTotalsPlot(TICKS_PER_SECOND), StatusInformationPlot()]

    intervalQueue = {}
    intervalKeys = []

    reader = csv.reader(f, delimiter="\t") # sequentially read all lines in the CSV file
    for row in reader:
        if row[2] == "status":
            continue
        worldTimeTick = int(row[1])
        world = row[6]
        
        if worldTimeTick not in intervalQueue:
            intervalQueue[worldTimeTick] = [] # create new instance of instance data class
            intervalKeys.append(worldTimeTick)

        interval = intervalQueue[worldTimeTick]
        interval.append(row)

The following code creates a CSV file, where the changed chunks to every tick can be seen.

In [7]:
df = pd.DataFrame(columns=["tick", "chunks"])
df.set_index('tick', inplace=True)

intervalKeys = sorted(intervalKeys)
for key in intervalKeys:
    
    chunks = set()
    for row in intervalQueue[key]:
        chunks.add(row[7])
        
    #print(';'.join(list(chunks)))
    df.loc[key] = [';'.join(list(chunks))]

df.to_csv(outputfilename, sep='\t')