In [1]:
import pandas as pd
import math
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score


READ AND PREPROCESS DATA

In [2]:
df =pd.read_csv("output.csv")

In [3]:
df.shape

(2801, 3)

In [4]:
df.head()

Unnamed: 0,Bot_Cell,Crew_Cell,Closed_Cells
0,"(3, 5)","(7, 10)","[(1, 9), (4, 0), (1, 10), (10, 3), (8, 0), (0,..."
1,"(3, 6)","(8, 10)","[(1, 9), (4, 0), (1, 10), (10, 3), (8, 0), (0,..."
2,"(4, 7)","(8, 9)","[(1, 9), (4, 0), (1, 10), (10, 3), (8, 0), (0,..."
3,"(5, 8)","(9, 9)","[(1, 9), (4, 0), (1, 10), (10, 3), (8, 0), (0,..."
4,"(6, 9)","(8, 9)","[(1, 9), (4, 0), (1, 10), (10, 3), (8, 0), (0,..."


In [5]:
# Extract x, y, p, and q values from the existing columns
df['bot_x'] = df['Bot_Cell'].apply(lambda x: int(x.split(',')[0].strip('()')))
df['bot_y'] = df['Bot_Cell'].apply(lambda x: int(x.split(',')[1].strip('()')))
df['crew_x'] = df['Crew_Cell'].apply(lambda x: int(x.split(',')[0].strip('()')))
df['crew_y'] = df['Crew_Cell'].apply(lambda x: int(x.split(',')[1].strip('()')))

# Calculate the distance
df['Distance_from_bot_to_crew'] = abs(df['bot_x'] - df['crew_x']) + abs(df['bot_y'] - df['crew_y'])
df['Distance_from_bot_to_teleport'] = abs(df['bot_x'] - 5) + abs(df['bot_y'] - 5)
df['Distance_from_crew_to_teleport'] = abs(5 - df['crew_x']) + abs(5 - df['crew_y'])
#Drop the intermediate columns x, y, p, and q if needed
df.drop(['crew_x', 'crew_y', 'bot_x', 'bot_y'], axis=1, inplace=True)



In [6]:
df.head()

Unnamed: 0,Bot_Cell,Crew_Cell,Closed_Cells,Distance_from_bot_to_crew,Distance_from_bot_to_teleport,Distance_from_crew_to_teleport
0,"(3, 5)","(7, 10)","[(1, 9), (4, 0), (1, 10), (10, 3), (8, 0), (0,...",9,2,7
1,"(3, 6)","(8, 10)","[(1, 9), (4, 0), (1, 10), (10, 3), (8, 0), (0,...",9,3,8
2,"(4, 7)","(8, 9)","[(1, 9), (4, 0), (1, 10), (10, 3), (8, 0), (0,...",6,3,7
3,"(5, 8)","(9, 9)","[(1, 9), (4, 0), (1, 10), (10, 3), (8, 0), (0,...",5,3,8
4,"(6, 9)","(8, 9)","[(1, 9), (4, 0), (1, 10), (10, 3), (8, 0), (0,...",2,5,7


In [7]:
df['Bot_Move'] = df['Bot_Cell'].shift(-1)

In [8]:
def parse_tuple(cell):
    parts = cell.strip('()').split(',')
    return tuple(int(part.strip()) for part in parts)

def clean_Bot_Move(row, df):
    crew_cell = parse_tuple(row['Crew_Cell'])
    if crew_cell[0] == 5 and crew_cell[1] == 5:
        return None
    else:
        next_row_index = row.name + 1  # Get the index of the next row
        if next_row_index < len(df):
            return df.at[next_row_index, 'Bot_Cell']  # Return Bot_Cell value from the next row


df['Bot_Move'] = df.apply(lambda row: clean_Bot_Move(row, df), axis=1)


In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2801 entries, 0 to 2800
Data columns (total 7 columns):
 #   Column                          Non-Null Count  Dtype 
---  ------                          --------------  ----- 
 0   Bot_Cell                        2801 non-null   object
 1   Crew_Cell                       2801 non-null   object
 2   Closed_Cells                    2801 non-null   object
 3   Distance_from_bot_to_crew       2801 non-null   int64 
 4   Distance_from_bot_to_teleport   2801 non-null   int64 
 5   Distance_from_crew_to_teleport  2801 non-null   int64 
 6   Bot_Move                        2701 non-null   object
dtypes: int64(3), object(4)
memory usage: 153.3+ KB


In [10]:
df =df.dropna()

In [11]:
def parse_coordinates(coord_str):
    if coord_str:
        x, y = map(int, coord_str.strip("()").split(","))
        return x, y
    else:
        return None

In [12]:

def calculate_direction(row):
    bot_cell = parse_coordinates(row['Bot_Cell'])
    bot_move = parse_coordinates(row['Bot_Move'])

    if bot_cell and bot_move:
        delta_x = bot_move[0] - bot_cell[0]
        delta_y = bot_move[1] - bot_cell[1]

        if delta_x == 0 and delta_y == 0:
            return "No movement"
        elif delta_x == 0:
            return "North" if delta_y > 0 else "South"
        elif delta_y == 0:
            return "East" if delta_x > 0 else "West"
        elif delta_x > 0:
            return "Northeast" if delta_y > 0 else "Southeast"
        else:
            return "Northwest" if delta_y > 0 else "Southwest"
    else:
        return "Invalid coordinates"

# Apply the calculate_direction function to each row and store the result in a new column
df['Direction_of_Bot'] = df.apply(lambda row: calculate_direction(row), axis=1)
    

In [13]:
df.head()

Unnamed: 0,Bot_Cell,Crew_Cell,Closed_Cells,Distance_from_bot_to_crew,Distance_from_bot_to_teleport,Distance_from_crew_to_teleport,Bot_Move,Direction_of_Bot
0,"(3, 5)","(7, 10)","[(1, 9), (4, 0), (1, 10), (10, 3), (8, 0), (0,...",9,2,7,"(3, 6)",North
1,"(3, 6)","(8, 10)","[(1, 9), (4, 0), (1, 10), (10, 3), (8, 0), (0,...",9,3,8,"(4, 7)",Northeast
2,"(4, 7)","(8, 9)","[(1, 9), (4, 0), (1, 10), (10, 3), (8, 0), (0,...",6,3,7,"(5, 8)",Northeast
3,"(5, 8)","(9, 9)","[(1, 9), (4, 0), (1, 10), (10, 3), (8, 0), (0,...",5,3,8,"(6, 9)",Northeast
4,"(6, 9)","(8, 9)","[(1, 9), (4, 0), (1, 10), (10, 3), (8, 0), (0,...",2,5,7,"(7, 10)",Northeast


In [14]:
def map_coordinates_to_integer(row,celltype):
    cell = parse_coordinates(row[celltype])
    cols = 11
    return cell[0] * cols + cell[1] + 1

df["Bot_Cell_Encoded"] = df.apply(lambda row: map_coordinates_to_integer(row,"Bot_Cell"), axis=1)
df["Crew_Cell_Encoded"] = df.apply(lambda row: map_coordinates_to_integer(row,"Crew_Cell"), axis=1)
df["Bot_Move_Encoded"] = df.apply(lambda row: map_coordinates_to_integer(row,"Bot_Move"), axis=1)


In [15]:
df.head()

Unnamed: 0,Bot_Cell,Crew_Cell,Closed_Cells,Distance_from_bot_to_crew,Distance_from_bot_to_teleport,Distance_from_crew_to_teleport,Bot_Move,Direction_of_Bot,Bot_Cell_Encoded,Crew_Cell_Encoded,Bot_Move_Encoded
0,"(3, 5)","(7, 10)","[(1, 9), (4, 0), (1, 10), (10, 3), (8, 0), (0,...",9,2,7,"(3, 6)",North,39,88,40
1,"(3, 6)","(8, 10)","[(1, 9), (4, 0), (1, 10), (10, 3), (8, 0), (0,...",9,3,8,"(4, 7)",Northeast,40,99,52
2,"(4, 7)","(8, 9)","[(1, 9), (4, 0), (1, 10), (10, 3), (8, 0), (0,...",6,3,7,"(5, 8)",Northeast,52,98,64
3,"(5, 8)","(9, 9)","[(1, 9), (4, 0), (1, 10), (10, 3), (8, 0), (0,...",5,3,8,"(6, 9)",Northeast,64,109,76
4,"(6, 9)","(8, 9)","[(1, 9), (4, 0), (1, 10), (10, 3), (8, 0), (0,...",2,5,7,"(7, 10)",Northeast,76,98,88


In [16]:
def parse_wall_coordinates(cell):
    # Remove leading and trailing brackets and split by comma
    cells = cell.strip('[]').split(',')
    coordinates = []
    for cell in cells:
        # Extract coordinates from each cell, remove parentheses, and split by comma
        parts = cell.strip('()').split(',')
        # Convert parts to integers and create tuple
        coordinate = tuple(int(part.strip()) for part in parts if part.strip().isdigit())
        coordinates.append(coordinate)
    return coordinates

def encode_closed_cells(row):
    # Convert string representation of Closed_cells to list of cell tuples
    cells = parse_wall_coordinates(row['Closed_Cells'])
    # Assign a unique identifier to each unique cell
    unique_cells = set(cells)
    cell_mapping = {cell: i for i, cell in enumerate(unique_cells)}
    # Concatenate the identifiers of the cells to form a single encoded value
    encoded_value = ''.join(str(cell_mapping[cell]) for cell in cells)
    return encoded_value

# Apply the encoding function to each row in the DataFrame
df['Wall_Encoded_value'] = df.apply(encode_closed_cells, axis=1)


In [17]:
df.head()

Unnamed: 0,Bot_Cell,Crew_Cell,Closed_Cells,Distance_from_bot_to_crew,Distance_from_bot_to_teleport,Distance_from_crew_to_teleport,Bot_Move,Direction_of_Bot,Bot_Cell_Encoded,Crew_Cell_Encoded,Bot_Move_Encoded,Wall_Encoded_value
0,"(3, 5)","(7, 10)","[(1, 9), (4, 0), (1, 10), (10, 3), (8, 0), (0,...",9,2,7,"(3, 6)",North,39,88,40,38656467656068626261
1,"(3, 6)","(8, 10)","[(1, 9), (4, 0), (1, 10), (10, 3), (8, 0), (0,...",9,3,8,"(4, 7)",Northeast,40,99,52,38656467656068626261
2,"(4, 7)","(8, 9)","[(1, 9), (4, 0), (1, 10), (10, 3), (8, 0), (0,...",6,3,7,"(5, 8)",Northeast,52,98,64,38656467656068626261
3,"(5, 8)","(9, 9)","[(1, 9), (4, 0), (1, 10), (10, 3), (8, 0), (0,...",5,3,8,"(6, 9)",Northeast,64,109,76,38656467656068626261
4,"(6, 9)","(8, 9)","[(1, 9), (4, 0), (1, 10), (10, 3), (8, 0), (0,...",2,5,7,"(7, 10)",Northeast,76,98,88,38656467656068626261


In [18]:
label_encoder = LabelEncoder()
label_encoded_df = df.copy()
if label_encoded_df["Direction_of_Bot"].dtype == 'object':
    label_encoded_df["Direction_of_Bot"] = label_encoder.fit_transform(label_encoded_df["Direction_of_Bot"])

In [19]:
label_encoded_df.head()

Unnamed: 0,Bot_Cell,Crew_Cell,Closed_Cells,Distance_from_bot_to_crew,Distance_from_bot_to_teleport,Distance_from_crew_to_teleport,Bot_Move,Direction_of_Bot,Bot_Cell_Encoded,Crew_Cell_Encoded,Bot_Move_Encoded,Wall_Encoded_value
0,"(3, 5)","(7, 10)","[(1, 9), (4, 0), (1, 10), (10, 3), (8, 0), (0,...",9,2,7,"(3, 6)",2,39,88,40,38656467656068626261
1,"(3, 6)","(8, 10)","[(1, 9), (4, 0), (1, 10), (10, 3), (8, 0), (0,...",9,3,8,"(4, 7)",3,40,99,52,38656467656068626261
2,"(4, 7)","(8, 9)","[(1, 9), (4, 0), (1, 10), (10, 3), (8, 0), (0,...",6,3,7,"(5, 8)",3,52,98,64,38656467656068626261
3,"(5, 8)","(9, 9)","[(1, 9), (4, 0), (1, 10), (10, 3), (8, 0), (0,...",5,3,8,"(6, 9)",3,64,109,76,38656467656068626261
4,"(6, 9)","(8, 9)","[(1, 9), (4, 0), (1, 10), (10, 3), (8, 0), (0,...",2,5,7,"(7, 10)",3,76,98,88,38656467656068626261


In [20]:
label_encoded_df = label_encoded_df.drop('Bot_Cell',axis =1)
label_encoded_df = label_encoded_df.drop('Crew_Cell',axis =1)
label_encoded_df = label_encoded_df.drop('Bot_Move',axis =1)
label_encoded_df = label_encoded_df.drop('Closed_Cells',axis =1)
label_encoded_df = label_encoded_df.drop('Wall_Encoded_value',axis =1)

In [21]:
label_encoded_df.head()

Unnamed: 0,Distance_from_bot_to_crew,Distance_from_bot_to_teleport,Distance_from_crew_to_teleport,Direction_of_Bot,Bot_Cell_Encoded,Crew_Cell_Encoded,Bot_Move_Encoded
0,9,2,7,2,39,88,40
1,9,3,8,3,40,99,52
2,6,3,7,3,52,98,64
3,5,3,8,3,64,109,76
4,2,5,7,3,76,98,88


In [22]:
correlation_matrix2 = label_encoded_df.corr()

In [23]:
correlation_matrix2

Unnamed: 0,Distance_from_bot_to_crew,Distance_from_bot_to_teleport,Distance_from_crew_to_teleport,Direction_of_Bot,Bot_Cell_Encoded,Crew_Cell_Encoded,Bot_Move_Encoded
Distance_from_bot_to_crew,1.0,-0.122854,0.346885,0.110827,0.018176,0.103689,0.023691
Distance_from_bot_to_teleport,-0.122854,1.0,0.472556,-0.027424,-0.015072,-0.060039,-0.022528
Distance_from_crew_to_teleport,0.346885,0.472556,1.0,0.048831,0.015522,0.058995,0.022163
Direction_of_Bot,0.110827,-0.027424,0.048831,1.0,0.239322,-0.0774,0.035553
Bot_Cell_Encoded,0.018176,-0.015072,0.015522,0.239322,1.0,0.791944,0.965162
Crew_Cell_Encoded,0.103689,-0.060039,0.058995,-0.0774,0.791944,1.0,0.875963
Bot_Move_Encoded,0.023691,-0.022528,0.022163,0.035553,0.965162,0.875963,1.0


In [24]:
label_encoded_df = label_encoded_df.drop("Distance_from_bot_to_teleport",axis=1)


TRAIN TEST SPLIT

In [25]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [26]:
final_data = label_encoded_df.copy()
final_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2701 entries, 0 to 2799
Data columns (total 6 columns):
 #   Column                          Non-Null Count  Dtype
---  ------                          --------------  -----
 0   Distance_from_bot_to_crew       2701 non-null   int64
 1   Distance_from_crew_to_teleport  2701 non-null   int64
 2   Direction_of_Bot                2701 non-null   int32
 3   Bot_Cell_Encoded                2701 non-null   int64
 4   Crew_Cell_Encoded               2701 non-null   int64
 5   Bot_Move_Encoded                2701 non-null   int64
dtypes: int32(1), int64(5)
memory usage: 137.2 KB


In [27]:
final_data = final_data.dropna()

In [28]:
final_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2701 entries, 0 to 2799
Data columns (total 6 columns):
 #   Column                          Non-Null Count  Dtype
---  ------                          --------------  -----
 0   Distance_from_bot_to_crew       2701 non-null   int64
 1   Distance_from_crew_to_teleport  2701 non-null   int64
 2   Direction_of_Bot                2701 non-null   int32
 3   Bot_Cell_Encoded                2701 non-null   int64
 4   Crew_Cell_Encoded               2701 non-null   int64
 5   Bot_Move_Encoded                2701 non-null   int64
dtypes: int32(1), int64(5)
memory usage: 137.2 KB


In [29]:
X = final_data.drop('Bot_Move_Encoded', axis=1)
y = final_data['Bot_Move_Encoded']

In [30]:
X.head()

Unnamed: 0,Distance_from_bot_to_crew,Distance_from_crew_to_teleport,Direction_of_Bot,Bot_Cell_Encoded,Crew_Cell_Encoded
0,9,7,2,39,88
1,9,8,3,40,99
2,6,7,3,52,98
3,5,8,3,64,109
4,2,7,3,76,98


In [31]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [32]:
X_train

Unnamed: 0,Distance_from_bot_to_crew,Distance_from_crew_to_teleport,Direction_of_Bot,Bot_Cell_Encoded,Crew_Cell_Encoded
2379,2,1,8,70,60
2351,12,8,3,27,99
2560,2,6,8,31,19
1049,2,3,2,80,82
1528,2,6,2,19,31
...,...,...,...,...,...
1695,3,5,4,13,16
1135,2,2,2,81,83
1170,2,5,5,20,30
1339,2,4,5,27,37


MODELS

LINEAR REGRESSION

In [33]:
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

Mean Squared Error: 18.566047185394336


DECISION TREE REGRESSOR

In [35]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score

model = DecisionTreeRegressor()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)



Mean Squared Error: 11.619223659889094
