In [35]:
import csv
import random
from datetime import datetime, timedelta
import json

# Generate data for 1000 characters
NUM_ROWS = 1000

# Create the CSV file
OUTPUT_FILE = "troop_movements.csv"

# Load home world data from JSON file
with open("home_worlds.json") as json_file:
    home_worlds = json.load(json_file)

# Generate data rows
data_rows = []
for i in range(1, NUM_ROWS + 1):
    # Generate random values for each column
    timestamp = datetime.now() - timedelta(seconds=i)
    unit_id = i
    unit_type = random.choice(
        ["stormtrooper", "tie_fighter", "at-st", "x-wing", "resistance_soldier", "at-at", "tie_silencer", "unknown"]
    )
    empire_or_resistance = random.choice(["empire", "resistance"])
    location_x = random.randint(1, 10)
    location_y = random.randint(1, 10)
    destination_x = random.randint(1, 10)
    destination_y = random.randint(1, 10)

    # Select a random home world from the available options
    home_world = random.choice(home_worlds)
    home_world_name = home_world["name"]

    # Create the data row
    data_row = [
        timestamp.strftime("%Y-%m-%d %H:%M:%S"),
        unit_id,
        unit_type,
        empire_or_resistance,
        location_x,
        location_y,
        destination_x,
        destination_y,
        home_world_name,
    ]

    # Add the data row to the list
    data_rows.append(data_row)

# Write the data to the CSV file
with open(OUTPUT_FILE, "w", newline="") as file:
    writer = csv.writer(file)
    writer.writerow(
        ["timestamp", "unit_id", "unit_type", "empire_or_resistance", "location_x", "location_y", "destination_x",
         "destination_y", "homeworld"]
    )
    writer.writerows(data_rows)

print("Data generation complete.")


Data generation complete.


Counts of Empire vs Resistance:
      Faction  Count
0      empire  482.0
1  resistance  518.0

Counts of Characters by Homeworld:
       Homeworld  Count
0       Alderaan   25.0
1    Aleen Minor   26.0
2     Bestine IV   16.0
3          Cerea   17.0
4       Champala   30.0
5      Chandrila   18.0
6   Concord Dawn   18.0
7       Corellia   20.0
8        Dagobah   22.0
9       Dathomir   32.0
10         Dorin   21.0
11        Eriadu   28.0
12   Glee Anselm   18.0
13    Haruun Kal   26.0
14       Iktotch   30.0
15      Iridonia   30.0
16         Kalee   23.0
17      Kashyyyk   26.0
18     Malastare   29.0
19        Mirial   30.0
20      Mon Cala   22.0
21    Muunilinst   20.0
22         Naboo   30.0
23          Ojom   19.0
24       Quermia   26.0
25         Rodia   21.0
26        Ryloth   23.0
27       Serenno   24.0
28         Shili   25.0
29         Skako   15.0
30       Socorro   24.0
31       Stewjon   19.0
32       Sullust   16.0
33      Tatooine   25.0
34       Tholoth   26.0
35   

In [41]:
import pandas as pd


file_path = "troop_movements.csv"
chunk_size = 10000


empire_vs_resistance_counts = pd.Series(dtype=int)
homeworld_counts = pd.Series(dtype=int)
unit_type_counts = pd.Series(dtype=int)
data_chunks = []


try:
    for chunk in pd.read_csv(file_path, chunksize=chunk_size):
     
        data_chunks.append(chunk)
        
      
        empire_vs_resistance_counts = empire_vs_resistance_counts.add(chunk['empire_or_resistance'].value_counts(), fill_value=0)

    
        homeworld_counts = homeworld_counts.add(chunk['homeworld'].value_counts(), fill_value=0)

     
        unit_type_counts = unit_type_counts.add(chunk['unit_type'].value_counts(), fill_value=0)
        
        
        chunk['is_resistance'] = chunk['empire_or_resistance'] == 'resistance'
        
   
        print("\nCurrent chunk unit type counts:")
        print(chunk['unit_type'].value_counts())

except Exception as e:
    print(f"An error occurred while reading the CSV file in chunks: {e}")
    data_chunks = []


if data_chunks:
    df = pd.concat(data_chunks)

   
    empire_vs_resistance = empire_vs_resistance_counts.reset_index()
    empire_vs_resistance.columns = ['Faction', 'Count']
    print("Counts of Empire vs Resistance:")
    print(empire_vs_resistance)

    characters_by_homeworld = homeworld_counts.reset_index()
    characters_by_homeworld.columns = ['Homeworld', 'Count']
    print("\nCounts of Characters by Homeworld:")
    print(characters_by_homeworld)

    characters_by_unit_type = unit_type_counts.reset_index()
    characters_by_unit_type.columns = ['Unit Type', 'Count']
    print("\nCounts of Characters by Unit Type:")
    print(characters_by_unit_type)

    df['is_resistance'] = df['empire_or_resistance'] == 'resistance'
    print("\nData with the new 'is_resistance' feature:")
    print(df.head())

else:
    print("The DataFrame could not be loaded. Please check the CSV file.")



Current chunk unit type counts:
unit_type
x-wing                152
at-at                 126
resistance_soldier    126
at-st                 124
stormtrooper          123
unknown               118
tie_fighter           116
tie_silencer          115
Name: count, dtype: int64
Counts of Empire vs Resistance:
      Faction  Count
0      empire  482.0
1  resistance  518.0

Counts of Characters by Homeworld:
       Homeworld  Count
0       Alderaan   25.0
1    Aleen Minor   26.0
2     Bestine IV   16.0
3          Cerea   17.0
4       Champala   30.0
5      Chandrila   18.0
6   Concord Dawn   18.0
7       Corellia   20.0
8        Dagobah   22.0
9       Dathomir   32.0
10         Dorin   21.0
11        Eriadu   28.0
12   Glee Anselm   18.0
13    Haruun Kal   26.0
14       Iktotch   30.0
15      Iridonia   30.0
16         Kalee   23.0
17      Kashyyyk   26.0
18     Malastare   29.0
19        Mirial   30.0
20      Mon Cala   22.0
21    Muunilinst   20.0
22         Naboo   30.0
23          Ojom

In [57]:
import pandas as pd

# Define the file path and chunk size
file_path = "troop_movements.csv"
chunk_size = 10000

# Initialize empty Series for the final concatenated results
empire_vs_resistance_counts = pd.Series(dtype=int)
homeworld_counts = pd.Series(dtype=int)
unit_type_counts = pd.Series(dtype=int)
data_chunks = []

# Read the CSV file in chunks
try:
    for chunk in pd.read_csv(file_path, chunksize=chunk_size):
        # Append chunk to list for later concatenation
        data_chunks.append(chunk)
        
        # 1. Update empire vs resistance counts
        empire_vs_resistance_counts = empire_vs_resistance_counts.add(chunk['empire_or_resistance'].value_counts(), fill_value=0)

        # 2. Update homeworld counts
        homeworld_counts = homeworld_counts.add(chunk['homeworld'].value_counts(), fill_value=0)

        # 3. Update unit type counts
        unit_type_counts = unit_type_counts.add(chunk['unit_type'].value_counts(), fill_value=0)
        
        # 4. Engineer a new feature called is_resistance
        chunk['is_resistance'] = chunk['empire_or_resistance'] == 'resistance'
        
        # Print the current unit type counts for debugging
        print("\nCurrent chunk unit type counts:")
        print(chunk['unit_type'].value_counts())

except Exception as e:
    print(f"An error occurred while reading the CSV file in chunks: {e}")
    data_chunks = []

# Concatenate all chunks into a single DataFrame
if data_chunks:
    df = pd.concat(data_chunks)

    # Display results
    empire_vs_resistance = empire_vs_resistance_counts.reset_index()
    empire_vs_resistance.columns = ['Homeworld', 'Count']
    print("\nCounts of Empire vs Resistance:")
    print(empire_vs_resistance)

    characters_by_homeworld = homeworld_counts.reset_index()
    characters_by_homeworld.columns = ['Homeworld', 'Count']
    print("\nCounts of Characters by Homeworld:")
    print(characters_by_homeworld)

    characters_by_unit_type = unit_type_counts.reset_index()
    characters_by_unit_type.columns = ['Unit Type', 'Count']
    print("\nCounts of Characters by Unit Type:")
    print(characters_by_unit_type)
    print("\n")


    # Engineer the new 'is_resistance' feature for the entire DataFrame
    df['is_resistance'] = df['empire_or_resistance'] == 'resistance'
    print("\nData with the new 'is_resistance' feature:")
    print(df[['timestamp', 'unit_id','unit_type','empire_or_resistance','location_x','location_y',
    'destination_x','destination_y','homeworld','is_resistance']].head(10))
    print("\n")

else:
    print("The DataFrame could not be loaded. Please check the CSV file.")



Current chunk unit type counts:
unit_type
x-wing                152
at-at                 126
resistance_soldier    126
at-st                 124
stormtrooper          123
unknown               118
tie_fighter           116
tie_silencer          115
Name: count, dtype: int64

Counts of Empire vs Resistance:
    Homeworld  Count
0      empire  482.0
1  resistance  518.0

Counts of Characters by Homeworld:
       Homeworld  Count
0       Alderaan   25.0
1    Aleen Minor   26.0
2     Bestine IV   16.0
3          Cerea   17.0
4       Champala   30.0
5      Chandrila   18.0
6   Concord Dawn   18.0
7       Corellia   20.0
8        Dagobah   22.0
9       Dathomir   32.0
10         Dorin   21.0
11        Eriadu   28.0
12   Glee Anselm   18.0
13    Haruun Kal   26.0
14       Iktotch   30.0
15      Iridonia   30.0
16         Kalee   23.0
17      Kashyyyk   26.0
18     Malastare   29.0
19        Mirial   30.0
20      Mon Cala   22.0
21    Muunilinst   20.0
22         Naboo   30.0
23          Ojo

In [72]:
import pandas as pd


file_path = "troop_movements.csv"
chunk_size = 10000


empire_vs_resistance_counts = pd.Series(dtype=int)
homeworld_counts = pd.Series(dtype=int)
unit_type_counts = pd.Series(dtype=int)
data_chunks = []

try:
    for chunk in pd.read_csv(file_path, chunksize=chunk_size):
     
        data_chunks.append(chunk)
        
        # 1. Update empire vs resistance counts
        empire_vs_resistance_counts = empire_vs_resistance_counts.add(chunk['empire_or_resistance'].value_counts(), fill_value=0)

        # 2. Update homeworld counts
        homeworld_counts = homeworld_counts.add(chunk['homeworld'].value_counts(), fill_value=0)

        # 3. Update unit type counts
        unit_type_counts = unit_type_counts.add(chunk['unit_type'].value_counts(), fill_value=0)
        
        # 4. Engineer a new feature called is_resistance
        chunk['is_resistance'] = chunk['empire_or_resistance'] == 'resistance'

       

except Exception as e:
    print(f"An error occurred while reading the CSV file in chunks: {e}")
    data_chunks = []


if data_chunks:
    df = pd.concat(data_chunks)
    
    print("\nDataFrame created from chunks:")
    print(df.head())

 
    empire_vs_resistance = empire_vs_resistance_counts.reset_index()
    empire_vs_resistance.columns = ['Faction', 'Count']
    print("Counts of Empire vs Resistance:\n")
    print(empire_vs_resistance)
    print("\n")

    characters_by_homeworld = homeworld_counts.reset_index()
    characters_by_homeworld.columns = ['Homeworld', 'Count']
    print("Counts of Characters by Homeworld:\n")
    print(characters_by_homeworld)
    print("\n")

    characters_by_unit_type = unit_type_counts.reset_index()
    characters_by_unit_type.columns = ['Unit Type', 'Count']
    print("Counts of Characters by Unit Type:\n")
    print(characters_by_unit_type)
    print("\n")


    df['is_resistance'] = df['empire_or_resistance'] == 'resistance'
    print("Data with the new 'is_resistance' feature:\n")
    print(df[['timestamp', 'unit_id', 'unit_type', 'empire_or_resistance', 'location_x', 'location_y', 'destination_x', 'destination_y', 'homeworld', 'is_resistance']].head(10))  # Display the first 10 rows as an example
    print("\n")

else:
    print("The DataFrame could not be loaded. Please check the CSV file.")

df 


DataFrame created from chunks:
             timestamp  unit_id     unit_type empire_or_resistance  \
0  2024-07-10 14:16:04        1   tie_fighter               empire   
1  2024-07-10 14:16:03        2  stormtrooper           resistance   
2  2024-07-10 14:16:02        3   tie_fighter               empire   
3  2024-07-10 14:16:01        4         at-at           resistance   
4  2024-07-10 14:16:00        5  tie_silencer           resistance   

   location_x  location_y  destination_x  destination_y   homeworld  \
0           7           8              9             10        Ojom   
1          10           1              2              9     Iktotch   
2           2           1              8              3    Iridonia   
3           6           4              6              2  Haruun Kal   
4           6           1             10             10    Iridonia   

   is_resistance  
0          False  
1           True  
2          False  
3           True  
4           True  
Counts

Unnamed: 0,timestamp,unit_id,unit_type,empire_or_resistance,location_x,location_y,destination_x,destination_y,homeworld,is_resistance
0,2024-07-10 14:16:04,1,tie_fighter,empire,7,8,9,10,Ojom,False
1,2024-07-10 14:16:03,2,stormtrooper,resistance,10,1,2,9,Iktotch,True
2,2024-07-10 14:16:02,3,tie_fighter,empire,2,1,8,3,Iridonia,False
3,2024-07-10 14:16:01,4,at-at,resistance,6,4,6,2,Haruun Kal,True
4,2024-07-10 14:16:00,5,tie_silencer,resistance,6,1,10,10,Iridonia,True
...,...,...,...,...,...,...,...,...,...,...
995,2024-07-10 13:59:29,996,resistance_soldier,empire,10,2,7,6,Iridonia,False
996,2024-07-10 13:59:28,997,at-at,resistance,1,5,3,1,Rodia,True
997,2024-07-10 13:59:27,998,tie_fighter,empire,7,8,8,1,Toydaria,False
998,2024-07-10 13:59:26,999,stormtrooper,empire,5,8,6,1,Dagobah,False
