In [1]:
import pandas as pd
import math
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

READ AND PREPROCESS DATA

In [2]:
df =pd.read_csv("walloutput.csv")

In [3]:
df.shape

(3773, 4)

In [4]:
df.head()

Unnamed: 0,Bot_Cell,Crew_Cell,Closed_Cells,Walls
0,"(0, 6)","(2, 8)","[(4, 4), (4, 6), (6, 4), (6, 6)]","[(8, 0), (5, 10), (9, 9), (4, 7), (1, 9), (5, ..."
1,"(1, 7)","(2, 7)","[(4, 4), (4, 6), (6, 4), (6, 6)]","[(8, 0), (5, 10), (9, 9), (4, 7), (1, 9), (5, ..."
2,"(1, 7)","(3, 7)","[(4, 4), (4, 6), (6, 4), (6, 6)]","[(8, 0), (5, 10), (9, 9), (4, 7), (1, 9), (5, ..."
3,"(2, 7)","(3, 8)","[(4, 4), (4, 6), (6, 4), (6, 6)]","[(8, 0), (5, 10), (9, 9), (4, 7), (1, 9), (5, ..."
4,"(2, 8)","(3, 9)","[(4, 4), (4, 6), (6, 4), (6, 6)]","[(8, 0), (5, 10), (9, 9), (4, 7), (1, 9), (5, ..."


In [5]:
# Extract x, y, p, and q values from the existing columns
df['bot_x'] = df['Bot_Cell'].apply(lambda x: int(x.split(',')[0].strip('()')))
df['bot_y'] = df['Bot_Cell'].apply(lambda x: int(x.split(',')[1].strip('()')))
df['crew_x'] = df['Crew_Cell'].apply(lambda x: int(x.split(',')[0].strip('()')))
df['crew_y'] = df['Crew_Cell'].apply(lambda x: int(x.split(',')[1].strip('()')))

# Calculate the distance
df['Distance_from_bot_to_crew'] = abs(df['bot_x'] - df['crew_x']) + abs(df['bot_y'] - df['crew_y'])
df['Distance_from_bot_to_teleport'] = abs(df['bot_x'] - 5) + abs(df['bot_y'] - 5)
df['Distance_from_crew_to_teleport'] = abs(5 - df['crew_x']) + abs(5 - df['crew_y'])

#Drop the intermediate columns x, y, p, and q if needed
df.drop(['crew_x', 'crew_y', 'bot_x', 'bot_y'], axis=1, inplace=True)

In [6]:
df.head()

Unnamed: 0,Bot_Cell,Crew_Cell,Closed_Cells,Walls,Distance_from_bot_to_crew,Distance_from_bot_to_teleport,Distance_from_crew_to_teleport
0,"(0, 6)","(2, 8)","[(4, 4), (4, 6), (6, 4), (6, 6)]","[(8, 0), (5, 10), (9, 9), (4, 7), (1, 9), (5, ...",4,6,6
1,"(1, 7)","(2, 7)","[(4, 4), (4, 6), (6, 4), (6, 6)]","[(8, 0), (5, 10), (9, 9), (4, 7), (1, 9), (5, ...",1,6,5
2,"(1, 7)","(3, 7)","[(4, 4), (4, 6), (6, 4), (6, 6)]","[(8, 0), (5, 10), (9, 9), (4, 7), (1, 9), (5, ...",2,6,4
3,"(2, 7)","(3, 8)","[(4, 4), (4, 6), (6, 4), (6, 6)]","[(8, 0), (5, 10), (9, 9), (4, 7), (1, 9), (5, ...",2,5,5
4,"(2, 8)","(3, 9)","[(4, 4), (4, 6), (6, 4), (6, 6)]","[(8, 0), (5, 10), (9, 9), (4, 7), (1, 9), (5, ...",2,6,6


In [7]:
df['Bot_Move'] = df['Bot_Cell'].shift(-1)

In [8]:
def parse_tuple(cell):
    parts = cell.strip('()').split(',')
    return tuple(int(part.strip()) for part in parts)

def clean_Bot_Move(row, df):
    crew_cell = parse_tuple(row['Crew_Cell'])
    if crew_cell[0] == 5 and crew_cell[1] == 5:
        return None
    else:
        next_row_index = row.name + 1  # Get the index of the next row
        if next_row_index < len(df):
            return df.at[next_row_index, 'Bot_Cell']  # Return Bot_Cell value from the next row


df['Bot_Move'] = df.apply(lambda row: clean_Bot_Move(row, df), axis=1)


In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3773 entries, 0 to 3772
Data columns (total 8 columns):
 #   Column                          Non-Null Count  Dtype 
---  ------                          --------------  ----- 
 0   Bot_Cell                        3773 non-null   object
 1   Crew_Cell                       3773 non-null   object
 2   Closed_Cells                    3773 non-null   object
 3   Walls                           3773 non-null   object
 4   Distance_from_bot_to_crew       3773 non-null   int64 
 5   Distance_from_bot_to_teleport   3773 non-null   int64 
 6   Distance_from_crew_to_teleport  3773 non-null   int64 
 7   Bot_Move                        3661 non-null   object
dtypes: int64(3), object(5)
memory usage: 235.9+ KB


In [10]:
df =df.dropna()

In [11]:
def parse_coordinates(coord_str):
    if coord_str:
        x, y = map(int, coord_str.strip("()").split(","))
        return x, y
    else:
        return None

In [12]:
def calculate_direction(row):
    bot_cell = parse_coordinates(row['Bot_Cell'])
    bot_move = parse_coordinates(row['Bot_Move'])

    if bot_cell and bot_move:
        delta_x = bot_move[0] - bot_cell[0]
        delta_y = bot_move[1] - bot_cell[1]

        if delta_x == 0 and delta_y == 0:
            return "No movement"
        elif delta_x == 0:
            return "North" if delta_y > 0 else "South"
        elif delta_y == 0:
            return "East" if delta_x > 0 else "West"
        elif delta_x > 0:
            return "Northeast" if delta_y > 0 else "Southeast"
        else:
            return "Northwest" if delta_y > 0 else "Southwest"
    else:
        return "Invalid coordinates"

# Apply the calculate_direction function to each row and store the result in a new column
df['Direction_of_Bot'] = df.apply(lambda row: calculate_direction(row), axis=1)


In [13]:
df.head()

Unnamed: 0,Bot_Cell,Crew_Cell,Closed_Cells,Walls,Distance_from_bot_to_crew,Distance_from_bot_to_teleport,Distance_from_crew_to_teleport,Bot_Move,Direction_of_Bot
0,"(0, 6)","(2, 8)","[(4, 4), (4, 6), (6, 4), (6, 6)]","[(8, 0), (5, 10), (9, 9), (4, 7), (1, 9), (5, ...",4,6,6,"(1, 7)",Northeast
1,"(1, 7)","(2, 7)","[(4, 4), (4, 6), (6, 4), (6, 6)]","[(8, 0), (5, 10), (9, 9), (4, 7), (1, 9), (5, ...",1,6,5,"(1, 7)",No movement
2,"(1, 7)","(3, 7)","[(4, 4), (4, 6), (6, 4), (6, 6)]","[(8, 0), (5, 10), (9, 9), (4, 7), (1, 9), (5, ...",2,6,4,"(2, 7)",East
3,"(2, 7)","(3, 8)","[(4, 4), (4, 6), (6, 4), (6, 6)]","[(8, 0), (5, 10), (9, 9), (4, 7), (1, 9), (5, ...",2,5,5,"(2, 8)",North
4,"(2, 8)","(3, 9)","[(4, 4), (4, 6), (6, 4), (6, 6)]","[(8, 0), (5, 10), (9, 9), (4, 7), (1, 9), (5, ...",2,6,6,"(2, 9)",North


In [14]:
def map_coordinates_to_integer(row,celltype):
    cell = parse_coordinates(row[celltype])
    cols = 11
    return cell[0] * cols + cell[1] + 1

df["Bot_Cell_Encoded"] = df.apply(lambda row: map_coordinates_to_integer(row,"Bot_Cell"), axis=1)
df["Crew_Cell_Encoded"] = df.apply(lambda row: map_coordinates_to_integer(row,"Crew_Cell"), axis=1)
df["Bot_Move_Encoded"] = df.apply(lambda row: map_coordinates_to_integer(row,"Bot_Move"), axis=1)

In [15]:
df.head()

Unnamed: 0,Bot_Cell,Crew_Cell,Closed_Cells,Walls,Distance_from_bot_to_crew,Distance_from_bot_to_teleport,Distance_from_crew_to_teleport,Bot_Move,Direction_of_Bot,Bot_Cell_Encoded,Crew_Cell_Encoded,Bot_Move_Encoded
0,"(0, 6)","(2, 8)","[(4, 4), (4, 6), (6, 4), (6, 6)]","[(8, 0), (5, 10), (9, 9), (4, 7), (1, 9), (5, ...",4,6,6,"(1, 7)",Northeast,7,31,19
1,"(1, 7)","(2, 7)","[(4, 4), (4, 6), (6, 4), (6, 6)]","[(8, 0), (5, 10), (9, 9), (4, 7), (1, 9), (5, ...",1,6,5,"(1, 7)",No movement,19,30,19
2,"(1, 7)","(3, 7)","[(4, 4), (4, 6), (6, 4), (6, 6)]","[(8, 0), (5, 10), (9, 9), (4, 7), (1, 9), (5, ...",2,6,4,"(2, 7)",East,19,41,30
3,"(2, 7)","(3, 8)","[(4, 4), (4, 6), (6, 4), (6, 6)]","[(8, 0), (5, 10), (9, 9), (4, 7), (1, 9), (5, ...",2,5,5,"(2, 8)",North,30,42,31
4,"(2, 8)","(3, 9)","[(4, 4), (4, 6), (6, 4), (6, 6)]","[(8, 0), (5, 10), (9, 9), (4, 7), (1, 9), (5, ...",2,6,6,"(2, 9)",North,31,43,32


In [16]:
def parse_wall_coordinates(cell):
    # Remove leading and trailing brackets and split by comma
    cells = cell.strip('[]').split(',')
    coordinates = []
    for cell in cells:
        # Extract coordinates from each cell, remove parentheses, and split by comma
        parts = cell.strip('()').split(',')
        # Convert parts to integers and create tuple
        coordinate = tuple(int(part.strip()) for part in parts if part.strip().isdigit())
        coordinates.append(coordinate)
    return coordinates

def encode_closed_cells(row):
    # Convert string representation of Closed_cells to list of cell tuples
    cells = parse_wall_coordinates(row['Closed_Cells'])
    # Assign a unique identifier to each unique cell
    unique_cells = set(cells)
    cell_mapping = {cell: i for i, cell in enumerate(unique_cells)}
    # Concatenate the identifiers of the cells to form a single encoded value
    encoded_value = ''.join(str(cell_mapping[cell]) for cell in cells)
    return encoded_value

# Apply the encoding function to each row in the DataFrame
df['Wall_Encoded_value'] = df.apply(encode_closed_cells, axis=1)


In [17]:
df.head()

Unnamed: 0,Bot_Cell,Crew_Cell,Closed_Cells,Walls,Distance_from_bot_to_crew,Distance_from_bot_to_teleport,Distance_from_crew_to_teleport,Bot_Move,Direction_of_Bot,Bot_Cell_Encoded,Crew_Cell_Encoded,Bot_Move_Encoded,Wall_Encoded_value
0,"(0, 6)","(2, 8)","[(4, 4), (4, 6), (6, 4), (6, 6)]","[(8, 0), (5, 10), (9, 9), (4, 7), (1, 9), (5, ...",4,6,6,"(1, 7)",Northeast,7,31,19,22101210
1,"(1, 7)","(2, 7)","[(4, 4), (4, 6), (6, 4), (6, 6)]","[(8, 0), (5, 10), (9, 9), (4, 7), (1, 9), (5, ...",1,6,5,"(1, 7)",No movement,19,30,19,22101210
2,"(1, 7)","(3, 7)","[(4, 4), (4, 6), (6, 4), (6, 6)]","[(8, 0), (5, 10), (9, 9), (4, 7), (1, 9), (5, ...",2,6,4,"(2, 7)",East,19,41,30,22101210
3,"(2, 7)","(3, 8)","[(4, 4), (4, 6), (6, 4), (6, 6)]","[(8, 0), (5, 10), (9, 9), (4, 7), (1, 9), (5, ...",2,5,5,"(2, 8)",North,30,42,31,22101210
4,"(2, 8)","(3, 9)","[(4, 4), (4, 6), (6, 4), (6, 6)]","[(8, 0), (5, 10), (9, 9), (4, 7), (1, 9), (5, ...",2,6,6,"(2, 9)",North,31,43,32,22101210


In [18]:
def lengthSquare(X, Y):
    xDiff = X[0] - Y[0]
    yDiff = X[1] - Y[1]
    return xDiff * xDiff + yDiff * yDiff

def getAngle(a, b):
    c = (5, 5)
    a2 = lengthSquare(a, c)
    b2 = lengthSquare(b, c)
    c2 = lengthSquare(a, b)
    return math.acos((a2 + b2 - c2) / (2 * math.sqrt(a2) * math.sqrt(b2)))#(math.acos((a2 + b2 - c2) / (2 * math.sqrt(a2) * math.sqrt(b2))) * 180 / math.pi);

def parse_angles(row):
    crew_cell = parse_tuple(row['Crew_Cell'])
    bot_cell = parse_tuple(row['Bot_Cell'])
    return getAngle(bot_cell, crew_cell)

# df['Angle_of_Bot'] = df.apply(parse_angles, axis=1)

In [19]:
label_encoder = LabelEncoder()
label_encoded_df = df.copy()
if label_encoded_df["Direction_of_Bot"].dtype == 'object':
    label_encoded_df["Direction_of_Bot"] = label_encoder.fit_transform(label_encoded_df["Direction_of_Bot"])

# if label_encoded_df["Angle_of_Bot"].dtype == 'object':
#   label_encoded_df["Angle_of_Bot"] = label_encoder.fit_transform(label_encoded_df["Angle_of_Bot"])

# if label_encoded_df["Wall_Encoded_value"].dtype == 'object':
#   label_encoded_df["Wall_Encoded_value"] = label_encoder.fit_transform(label_encoded_df["Wall_Encoded_value"])

In [20]:
label_encoded_df.head(70)

Unnamed: 0,Bot_Cell,Crew_Cell,Closed_Cells,Walls,Distance_from_bot_to_crew,Distance_from_bot_to_teleport,Distance_from_crew_to_teleport,Bot_Move,Direction_of_Bot,Bot_Cell_Encoded,Crew_Cell_Encoded,Bot_Move_Encoded,Wall_Encoded_value
0,"(0, 6)","(2, 8)","[(4, 4), (4, 6), (6, 4), (6, 6)]","[(8, 0), (5, 10), (9, 9), (4, 7), (1, 9), (5, ...",4,6,6,"(1, 7)",3,7,31,19,22101210
1,"(1, 7)","(2, 7)","[(4, 4), (4, 6), (6, 4), (6, 6)]","[(8, 0), (5, 10), (9, 9), (4, 7), (1, 9), (5, ...",1,6,5,"(1, 7)",1,19,30,19,22101210
2,"(1, 7)","(3, 7)","[(4, 4), (4, 6), (6, 4), (6, 6)]","[(8, 0), (5, 10), (9, 9), (4, 7), (1, 9), (5, ...",2,6,4,"(2, 7)",0,19,41,30,22101210
3,"(2, 7)","(3, 8)","[(4, 4), (4, 6), (6, 4), (6, 6)]","[(8, 0), (5, 10), (9, 9), (4, 7), (1, 9), (5, ...",2,5,5,"(2, 8)",2,30,42,31,22101210
4,"(2, 8)","(3, 9)","[(4, 4), (4, 6), (6, 4), (6, 6)]","[(8, 0), (5, 10), (9, 9), (4, 7), (1, 9), (5, ...",2,6,6,"(2, 9)",2,31,43,32,22101210
...,...,...,...,...,...,...,...,...,...,...,...,...,...
68,"(1, 3)","(3, 3)","[(4, 4), (4, 6), (6, 4), (6, 6)]","[(1, 5), (5, 10), (10, 7), (8, 5), (8, 4), (1,...",2,6,4,"(2, 3)",0,15,37,26,22101210
69,"(2, 3)","(3, 2)","[(4, 4), (4, 6), (6, 4), (6, 6)]","[(1, 5), (5, 10), (10, 7), (8, 5), (8, 4), (1,...",2,5,5,"(2, 2)",5,26,36,25,22101210
70,"(2, 2)","(3, 1)","[(4, 4), (4, 6), (6, 4), (6, 6)]","[(1, 5), (5, 10), (10, 7), (8, 5), (8, 4), (1,...",2,6,6,"(2, 1)",5,25,35,24,22101210
71,"(2, 1)","(3, 0)","[(4, 4), (4, 6), (6, 4), (6, 6)]","[(1, 5), (5, 10), (10, 7), (8, 5), (8, 4), (1,...",2,7,7,"(2, 0)",5,24,34,23,22101210


In [21]:
label_encoded_df = label_encoded_df.drop('Bot_Cell',axis =1)
label_encoded_df = label_encoded_df.drop('Crew_Cell',axis =1)
label_encoded_df = label_encoded_df.drop('Bot_Move',axis =1)
label_encoded_df = label_encoded_df.drop('Closed_Cells',axis =1)
label_encoded_df = label_encoded_df.drop('Walls',axis =1)

In [22]:
label_encoded_df.head()

Unnamed: 0,Distance_from_bot_to_crew,Distance_from_bot_to_teleport,Distance_from_crew_to_teleport,Direction_of_Bot,Bot_Cell_Encoded,Crew_Cell_Encoded,Bot_Move_Encoded,Wall_Encoded_value
0,4,6,6,3,7,31,19,22101210
1,1,6,5,1,19,30,19,22101210
2,2,6,4,0,19,41,30,22101210
3,2,5,5,2,30,42,31,22101210
4,2,6,6,2,31,43,32,22101210


In [23]:
correlation_matrix2 = label_encoded_df.corr()

In [24]:
correlation_matrix2

Unnamed: 0,Distance_from_bot_to_crew,Distance_from_bot_to_teleport,Distance_from_crew_to_teleport,Direction_of_Bot,Bot_Cell_Encoded,Crew_Cell_Encoded,Bot_Move_Encoded,Wall_Encoded_value
Distance_from_bot_to_crew,1.0,-0.095291,0.322942,0.106871,0.010053,0.021964,0.004353,
Distance_from_bot_to_teleport,-0.095291,1.0,0.46911,-0.021628,0.016529,-0.009996,0.012319,
Distance_from_crew_to_teleport,0.322942,0.46911,1.0,0.048043,0.021695,0.045687,0.026401,
Direction_of_Bot,0.106871,-0.021628,0.048043,1.0,0.204114,-0.094744,0.006324,
Bot_Cell_Encoded,0.010053,0.016529,0.021695,0.204114,1.0,0.814162,0.965831,
Crew_Cell_Encoded,0.021964,-0.009996,0.045687,-0.094744,0.814162,1.0,0.890875,
Bot_Move_Encoded,0.004353,0.012319,0.026401,0.006324,0.965831,0.890875,1.0,
Wall_Encoded_value,,,,,,,,


In [25]:
label_encoded_df = label_encoded_df.drop("Distance_from_bot_to_teleport",axis=1)
label_encoded_df = label_encoded_df.drop("Distance_from_crew_to_teleport",axis=1)
# label_encoded_df = label_encoded_df.drop("Distance_from_bot_to_crew",axis=1)
# label_encoded_df = label_encoded_df.drop("Direction_of_Bot",axis=1)
# label_encoded_df = label_encoded_df.drop("Wall_Encoded_value",axis=1)



TRAIN TEST SPLIT

In [26]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [27]:
final_data = label_encoded_df.copy()
final_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3661 entries, 0 to 3771
Data columns (total 6 columns):
 #   Column                     Non-Null Count  Dtype 
---  ------                     --------------  ----- 
 0   Distance_from_bot_to_crew  3661 non-null   int64 
 1   Direction_of_Bot           3661 non-null   int32 
 2   Bot_Cell_Encoded           3661 non-null   int64 
 3   Crew_Cell_Encoded          3661 non-null   int64 
 4   Bot_Move_Encoded           3661 non-null   int64 
 5   Wall_Encoded_value         3661 non-null   object
dtypes: int32(1), int64(4), object(1)
memory usage: 185.9+ KB


In [28]:
final_data = final_data.dropna()

In [29]:
final_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3661 entries, 0 to 3771
Data columns (total 6 columns):
 #   Column                     Non-Null Count  Dtype 
---  ------                     --------------  ----- 
 0   Distance_from_bot_to_crew  3661 non-null   int64 
 1   Direction_of_Bot           3661 non-null   int32 
 2   Bot_Cell_Encoded           3661 non-null   int64 
 3   Crew_Cell_Encoded          3661 non-null   int64 
 4   Bot_Move_Encoded           3661 non-null   int64 
 5   Wall_Encoded_value         3661 non-null   object
dtypes: int32(1), int64(4), object(1)
memory usage: 185.9+ KB


In [30]:
X = final_data.drop('Bot_Move_Encoded', axis=1)
y = final_data['Bot_Move_Encoded']

In [31]:
X.head()

Unnamed: 0,Distance_from_bot_to_crew,Direction_of_Bot,Bot_Cell_Encoded,Crew_Cell_Encoded,Wall_Encoded_value
0,4,3,7,31,22101210
1,1,1,19,30,22101210
2,2,0,19,41,22101210
3,2,2,30,42,22101210
4,2,2,31,43,22101210


In [32]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [33]:
X_train

Unnamed: 0,Distance_from_bot_to_crew,Direction_of_Bot,Bot_Cell_Encoded,Crew_Cell_Encoded,Wall_Encoded_value
589,2,5,10,20,22101210
3487,2,5,91,79,22101210
356,3,8,50,27,22101210
3507,2,0,41,63,22101210
1653,3,0,57,78,22101210
...,...,...,...,...,...
1171,1,1,43,42,22101210
1340,2,2,31,43,22101210
890,2,2,93,83,22101210
3611,2,7,18,16,22101210


MODELS

LINEAR REGRESSION

In [34]:
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

Mean Squared Error: 18.407944422339053


DECISION TREE REGRESSOR

In [35]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score

model = DecisionTreeRegressor()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

Mean Squared Error: 7.428376534788541
