**Complete EDA using D3.js visualizations on the Restaurant's cuisine ratings dataset**

**Understanding the domain and data selection**

In [None]:
# Load the dataset
from google.colab import files
uploaded = files.upload()
import pandas as pd
data = pd.read_csv('Cuisine_rating.csv')

Saving Cuisine_rating.csv to Cuisine_rating (1).csv


In [None]:
# Display the first few rows of the dataset
data.head()

Unnamed: 0,User ID,Area code,Location,Gender,YOB,Marital Status,Activity,Budget,Cuisines,Alcohol,Smoker,Food Rating,Service Rating,Overall Rating,Often A S
0,1,153,"Upper East Side,NY",Female,2006,Single,Professional,3,Japanese,Never,Never,5,4,4.5,No
1,2,123,"St. George,NY",Female,1991,Married,Student,3,Indian,Never,Socially,1,1,1.0,No
2,3,122,"Upper West Side,NY",Male,1977,Single,Student,5,Seafood,Often,Often,5,5,5.0,Yes
3,4,153,"Upper East Side,NY",Female,1956,Married,Professional,5,Japanese,Never,Socially,3,1,2.0,No
4,5,129,"Central Park,NY",Male,1997,Single,Student,4,Filipino,Socially,Never,2,4,3.0,No


**Data Cleaning**

In [None]:
# Check for missing values
missing_values = data.isnull().sum()

# Check for duplicates
duplicate_entries = data.duplicated().sum()

missing_values, duplicate_entries

(User ID           0
 Area code         0
 Location          0
 Gender            0
 YOB               0
 Marital Status    0
 Activity          0
 Budget            0
 Cuisines          0
 Alcohol           0
 Smoker            0
 Food Rating       0
 Service Rating    0
 Overall Rating    0
 Often A S         0
 dtype: int64,
 0)

In [None]:
# Creating data for Cuisines distribution
cuisine_counts = data['Cuisines'].value_counts().reset_index()
cuisine_counts.columns = ['Cuisines', 'Count']

cuisine_counts_list = cuisine_counts.to_dict('records')
cuisine_counts_list


[{'Cuisines': 'Japanese', 'Count': 36},
 {'Cuisines': 'Filipino', 'Count': 34},
 {'Cuisines': 'French', 'Count': 34},
 {'Cuisines': 'Indian', 'Count': 32},
 {'Cuisines': 'Chinese', 'Count': 24},
 {'Cuisines': 'Seafood', 'Count': 22},
 {'Cuisines': 'Italian', 'Count': 18}]

**Cuisines Distribution Visualization**

In [None]:
enhanced_cuisine_distribution_code = """
<!DOCTYPE html>
<script src="https://d3js.org/d3.v5.min.js"></script>
<div id="d3-container"></div>

<script>
var data = """ + str(cuisine_counts_list) + """;

var margin = {top: 20, right: 30, bottom: 50, left: 100},
    width = 800 - margin.left - margin.right,
    height = 600 - margin.top - margin.bottom;

// Create an SVG canvas
var svg = d3.select("#d3-container")
    .append("svg")
    .attr("width", width + margin.left + margin.right)
    .attr("height", height + margin.top + margin.bottom)
    .append("g")
    .attr("transform", "translate(" + margin.left + "," + margin.top + ")");

// Color scale
var color = d3.scaleSequential(d3.interpolateBlues)
    .domain([0, d3.max(data, function(d) { return d.Count; })]);

// Create a scale for the x-axis
var x = d3.scaleLinear()
    .domain([0, d3.max(data, function(d) { return d.Count; })])
    .range([0, width]);

// Create a scale for the y-axis
var y = d3.scaleBand()
    .domain(data.map(function(d) { return d.Cuisines; }))
    .range([0, height])
    .padding(0.1);

// Append rectangles for the bar chart
svg.selectAll(".bar")
    .data(data)
    .enter().append("rect")
    .attr("class", "bar")
    .attr("x", 0)
    .attr("y", function(d) { return y(d.Cuisines); })
    .attr("width", function(d) { return x(d.Count); })
    .attr("height", y.bandwidth())
    .attr("fill", function(d) { return color(d.Count); });

// Append x and y axis
svg.append("g")
    .attr("class", "x-axis")
    .attr("transform", "translate(0," + height + ")")
    .call(d3.axisBottom(x))
    .append("text")
    .attr("x", width/2)
    .attr("y", 35)
    .attr("fill", "#000")
    .attr("font-weight", "bold")
    .text("Count");

svg.append("g")
    .attr("class", "y-axis")
    .call(d3.axisLeft(y));

// Title
svg.append("text")
    .attr("x", width / 2)
    .attr("y", -10)
    .attr("text-anchor", "middle")
    .attr("font-size", "24px")
    .attr("font-weight", "bold")
    .text("Cuisines Distribution");
</script>
"""

display(HTML(enhanced_cuisine_distribution_code))

In [None]:
# Creating data for Overall Rating distribution
rating_counts = data['Overall Rating'].value_counts().reset_index()
rating_counts.columns = ['Overall Rating', 'Count']

rating_counts_list = rating_counts.to_dict('records')
rating_counts_list


[{'Overall Rating': 3.5, 'Count': 35},
 {'Overall Rating': 3.0, 'Count': 34},
 {'Overall Rating': 2.5, 'Count': 32},
 {'Overall Rating': 5.0, 'Count': 29},
 {'Overall Rating': 4.0, 'Count': 26},
 {'Overall Rating': 2.0, 'Count': 21},
 {'Overall Rating': 1.5, 'Count': 11},
 {'Overall Rating': 4.5, 'Count': 6},
 {'Overall Rating': 1.0, 'Count': 6}]

**Overall Rating Distribution Visualization**

In [None]:
rating_distribution_code = """
<!DOCTYPE html>
<script src="https://d3js.org/d3.v5.min.js"></script>
<div id="d3-rating-container"></div>

<script>
var data = """ + str(rating_counts_list) + """;

var margin = {top: 20, right: 30, bottom: 50, left: 60},
    width = 800 - margin.left - margin.right,
    height = 600 - margin.top - margin.bottom;

// Create an SVG canvas
var svg = d3.select("#d3-rating-container")
    .append("svg")
    .attr("width", width + margin.left + margin.right)
    .attr("height", height + margin.top + margin.bottom)
    .append("g")
    .attr("transform", "translate(" + margin.left + "," + margin.top + ")");

// Color scale
var color = d3.scaleSequential(d3.interpolateGreens)
    .domain([0, d3.max(data, function(d) { return d.Count; })]);

// Create a scale for the x-axis
var x = d3.scaleBand()
    .domain(data.map(function(d) { return d['Overall Rating']; }))
    .range([0, width])
    .padding(0.1);

// Create a scale for the y-axis
var y = d3.scaleLinear()
    .domain([0, d3.max(data, function(d) { return d.Count; })])
    .range([height, 0]);

// Append rectangles for the histogram
svg.selectAll(".bar")
    .data(data)
    .enter().append("rect")
    .attr("class", "bar")
    .attr("x", function(d) { return x(d['Overall Rating']); })
    .attr("y", function(d) { return y(d.Count); })
    .attr("width", x.bandwidth())
    .attr("height", function(d) { return height - y(d.Count); })
    .attr("fill", function(d) { return color(d.Count); });

// Append x and y axis
svg.append("g")
    .attr("class", "x-axis")
    .attr("transform", "translate(0," + height + ")")
    .call(d3.axisBottom(x))
    .append("text")
    .attr("x", width/2)
    .attr("y", 35)
    .attr("fill", "#000")
    .attr("font-weight", "bold")
    .text("Overall Rating");

svg.append("g")
    .attr("class", "y-axis")
    .call(d3.axisLeft(y))
    .append("text")
    .attr("x", -height/2)
    .attr("y", -40)
    .attr("transform", "rotate(-90)")
    .attr("fill", "#000")
    .attr("font-weight", "bold")
    .text("Count");

// Title
svg.append("text")
    .attr("x", width / 2)
    .attr("y", -10)
    .attr("text-anchor", "middle")
    .attr("font-size", "24px")
    .attr("font-weight", "bold")
    .text("Overall Rating Distribution");
</script>
"""

display(HTML(rating_distribution_code))

In [None]:
# Creating data for Gender distribution
gender_counts = data['Gender'].value_counts().reset_index()
gender_counts.columns = ['Gender', 'Count']

gender_counts_list = gender_counts.to_dict('records')
gender_counts_list


[{'Gender': 'Male', 'Count': 118}, {'Gender': 'Female', 'Count': 82}]

**Gender Distribution Pie Chart**

In [None]:
gender_distribution_code = """
<!DOCTYPE html>
<script src="https://d3js.org/d3.v5.min.js"></script>
<div id="d3-gender-container"></div>

<script>
var data = """ + str(gender_counts_list) + """;

var width = 600,
    height = 600,
    radius = Math.min(width, height) / 2;

var color = d3.scaleOrdinal()
    .domain(data.map(function(d) { return d.Gender; }))
    .range(["#1f77b4", "#ff7f0e"]);

// Create an SVG canvas
var svg = d3.select("#d3-gender-container")
    .append("svg")
    .attr("width", width)
    .attr("height", height)
    .append("g")
    .attr("transform", "translate(" + width / 2 + "," + height / 2 + ")");

var pie = d3.pie()
    .sort(null)
    .value(function(d) { return d.Count; });

var arc = d3.arc()
    .outerRadius(radius - 10)
    .innerRadius(0);

var labelArc = d3.arc()
    .outerRadius(radius - 40)
    .innerRadius(radius - 40);

var g = svg.selectAll(".arc")
    .data(pie(data))
    .enter().append("g")
    .attr("class", "arc");

g.append("path")
    .attr("d", arc)
    .style("fill", function(d) { return color(d.data.Gender); });

g.append("text")
    .attr("transform", function(d) { return "translate(" + labelArc.centroid(d) + ")"; })
    .attr("dy", ".35em")
    .text(function(d) { return d.data.Gender; });

// Title
svg.append("text")
    .attr("x", 0)
    .attr("y", -height/2 + 20)
    .attr("text-anchor", "middle")
    .attr("font-size", "24px")
    .attr("font-weight", "bold")
    .text("Gender Distribution");
</script>
"""

display(HTML(gender_distribution_code))

In [None]:
# Creating data for Budget distribution
budget_counts = data['Budget'].value_counts().reset_index()
budget_counts.columns = ['Budget', 'Count']

budget_counts_list = budget_counts.to_dict('records')
budget_counts_list

[{'Budget': 4, 'Count': 63},
 {'Budget': 5, 'Count': 62},
 {'Budget': 3, 'Count': 61},
 {'Budget': 1, 'Count': 10},
 {'Budget': 2, 'Count': 4}]

**Budget Distribution Bar Chart**

In [None]:
budget_distribution_code = """
<!DOCTYPE html>
<script src="https://d3js.org/d3.v5.min.js"></script>
<div id="d3-budget-container"></div>

<script>
var data = """ + str(budget_counts_list) + """;

var margin = {top: 20, right: 30, bottom: 50, left: 60},
    width = 800 - margin.left - margin.right,
    height = 600 - margin.top - margin.bottom;

// Create an SVG canvas
var svg = d3.select("#d3-budget-container")
    .append("svg")
    .attr("width", width + margin.left + margin.right)
    .attr("height", height + margin.top + margin.bottom)
    .append("g")
    .attr("transform", "translate(" + margin.left + "," + margin.top + ")");

// Color scale
var color = d3.scaleOrdinal(d3.schemeCategory10);

// Create a scale for the x-axis
var x = d3.scaleBand()
    .domain(data.map(function(d) { return d.Budget; }))
    .range([0, width])
    .padding(0.1);

// Create a scale for the y-axis
var y = d3.scaleLinear()
    .domain([0, d3.max(data, function(d) { return d.Count; })])
    .range([height, 0]);

// Append rectangles for the bar chart
svg.selectAll(".bar")
    .data(data)
    .enter().append("rect")
    .attr("class", "bar")
    .attr("x", function(d) { return x(d.Budget); })
    .attr("y", function(d) { return y(d.Count); })
    .attr("width", x.bandwidth())
    .attr("height", function(d) { return height - y(d.Count); })
    .attr("fill", function(d) { return color(d.Budget); });

// Append x and y axis
svg.append("g")
    .attr("class", "x-axis")
    .attr("transform", "translate(0," + height + ")")
    .call(d3.axisBottom(x))
    .append("text")
    .attr("x", width/2)
    .attr("y", 35)
    .attr("fill", "#000")
    .attr("font-weight", "bold")
    .text("Budget");

svg.append("g")
    .attr("class", "y-axis")
    .call(d3.axisLeft(y))
    .append("text")
    .attr("x", -height/2)
    .attr("y", -40)
    .attr("transform", "rotate(-90)")
    .attr("fill", "#000")
    .attr("font-weight", "bold")
    .text("Count");

// Title
svg.append("text")
    .attr("x", width / 2)
    .attr("y", -10)
    .attr("text-anchor", "middle")
    .attr("font-size", "24px")
    .attr("font-weight", "bold")
    .text("Budget Distribution");
</script>
"""

display(HTML(budget_distribution_code))

In [None]:
# Creating data for Alcohol preference distribution
alcohol_counts = data['Alcohol '].value_counts().reset_index()  # Noting the space in the column name
alcohol_counts.columns = ['Alcohol Preference', 'Count']

alcohol_counts_list = alcohol_counts.to_dict('records')
alcohol_counts_list


[{'Alcohol Preference': 'Never', 'Count': 88},
 {'Alcohol Preference': 'Often', 'Count': 61},
 {'Alcohol Preference': 'Socially', 'Count': 51}]

**Alcohol Preference Pie Chart**

In [None]:
alcohol_distribution_code = """
<!DOCTYPE html>
<script src="https://d3js.org/d3.v5.min.js"></script>
<div id="d3-alcohol-container"></div>

<script>
var data = """ + str(alcohol_counts_list) + """;

var width = 500,
    height = 500,
    radius = Math.min(width, height) / 2;

var color = d3.scaleOrdinal()
    .domain(data.map(function(d) { return d['Alcohol Preference']; }))
    .range(["#1f77b4", "#ff7f0e", "#2ca02c"]);

// Create an SVG canvas
var svg = d3.select("#d3-alcohol-container")
    .append("svg")
    .attr("width", width)
    .attr("height", height)
    .append("g")
    .attr("transform", "translate(" + width / 2 + "," + height / 2 + ")");

var pie = d3.pie()
    .sort(null)
    .value(function(d) { return d.Count; });

var arc = d3.arc()
    .outerRadius(radius - 10)
    .innerRadius(0);

var labelArc = d3.arc()
    .outerRadius(radius - 40)
    .innerRadius(radius - 40);

var g = svg.selectAll(".arc")
    .data(pie(data))
    .enter().append("g")
    .attr("class", "arc");

g.append("path")
    .attr("d", arc)
    .style("fill", function(d) { return color(d.data['Alcohol Preference']); });

g.append("text")
    .attr("transform", function(d) { return "translate(" + labelArc.centroid(d) + ")"; })
    .attr("dy", ".35em")
    .text(function(d) { return d.data['Alcohol Preference']; });

// Title
svg.append("text")
    .attr("x", 0)
    .attr("y", -height/2 + 20)
    .attr("text-anchor", "middle")
    .attr("font-size", "20px")
    .attr("font-weight", "bold")
    .text("Alcohol Preference Distribution");
</script>
"""

display(HTML(alcohol_distribution_code))

In [None]:
# Gender vs. Average Overall Rating
gender_avg_rating = data.groupby('Gender')['Overall Rating'].mean().reset_index()

gender_avg_rating

Unnamed: 0,Gender,Overall Rating
0,Female,3.335366
1,Male,3.148305


**Bivariate Visualization: Gender vs. Average Overall Rating (Horizontal Bar Chart)**

In [None]:
gender_avg_rating_horizontal_code = """
<!DOCTYPE html>
<script src="https://d3js.org/d3.v5.min.js"></script>
<div id="d3-gender-rating-horizontal-container"></div>

<script>
var data = """ + str(gender_avg_rating.to_dict('records')) + """;

var margin = {top: 20, right: 60, bottom: 30, left: 100},
    width = 400 - margin.left - margin.right,
    height = 300 - margin.top - margin.bottom;

// Create an SVG canvas
var svg = d3.select("#d3-gender-rating-horizontal-container")
    .append("svg")
    .attr("width", width + margin.left + margin.right)
    .attr("height", height + margin.top + margin.bottom)
    .append("g")
    .attr("transform", "translate(" + margin.left + "," + margin.top + ")");

// Color scale
var color = d3.scaleOrdinal()
    .domain(data.map(function(d) { return d.Gender; }))
    .range(["#1f77b4", "#ff7f0e"]);

// Create a scale for the y-axis
var y = d3.scaleBand()
    .domain(data.map(function(d) { return d.Gender; }))
    .range([0, height])
    .padding(0.1);

// Create a scale for the x-axis
var x = d3.scaleLinear()
    .domain([0, d3.max(data, function(d) { return d['Overall Rating']; })])
    .range([0, width]);

// Append rectangles for the bar chart
svg.selectAll(".bar")
    .data(data)
    .enter().append("rect")
    .attr("class", "bar")
    .attr("y", function(d) { return y(d.Gender); })
    .attr("x", 0)
    .attr("height", y.bandwidth())
    .attr("width", function(d) { return x(d['Overall Rating']); })
    .attr("fill", function(d) { return color(d.Gender); });

// Append x and y axis
svg.append("g")
    .attr("class", "x-axis")
    .attr("transform", "translate(0," + height + ")")
    .call(d3.axisBottom(x))
    .append("text")
    .attr("x", width/2)
    .attr("y", 25)
    .attr("fill", "#000")
    .attr("font-weight", "bold")
    .text("Average Overall Rating");

svg.append("g")
    .attr("class", "y-axis")
    .call(d3.axisLeft(y));

// Title
svg.append("text")
    .attr("x", width / 2)
    .attr("y", -10)
    .attr("text-anchor", "middle")
    .attr("font-size", "18px")
    .attr("font-weight", "bold")
    .text("Gender vs. Average Rating");
</script>
"""

display(HTML(gender_avg_rating_horizontal_code))

In [None]:
# Activity vs. Average Overall Rating
activity_avg_rating = data.groupby('Activity')['Overall Rating'].mean().reset_index()

activity_avg_rating


Unnamed: 0,Activity,Overall Rating
0,Professional,3.44375
1,Student,3.079167


**Bivariate Visualization: Activity vs. Average Overall Rating (Horizontal Bar Chart)**

In [None]:
activity_avg_rating_horizontal_code = """
<!DOCTYPE html>
<script src="https://d3js.org/d3.v5.min.js"></script>
<div id="d3-activity-rating-horizontal-container"></div>

<script>
var data = """ + str(activity_avg_rating.to_dict('records')) + """;

var margin = {top: 20, right: 60, bottom: 30, left: 120},
    width = 400 - margin.left - margin.right,
    height = 250 - margin.top - margin.bottom;

// Create an SVG canvas
var svg = d3.select("#d3-activity-rating-horizontal-container")
    .append("svg")
    .attr("width", width + margin.left + margin.right)
    .attr("height", height + margin.top + margin.bottom)
    .append("g")
    .attr("transform", "translate(" + margin.left + "," + margin.top + ")");

// Color scale
var color = d3.scaleOrdinal()
    .domain(data.map(function(d) { return d.Activity; }))
    .range(["#1f77b4", "#ff7f0e"]);

// Create a scale for the y-axis
var y = d3.scaleBand()
    .domain(data.map(function(d) { return d.Activity; }))
    .range([0, height])
    .padding(0.1);

// Create a scale for the x-axis
var x = d3.scaleLinear()
    .domain([0, d3.max(data, function(d) { return d['Overall Rating']; })])
    .range([0, width]);

// Append rectangles for the bar chart
svg.selectAll(".bar")
    .data(data)
    .enter().append("rect")
    .attr("class", "bar")
    .attr("y", function(d) { return y(d.Activity); })
    .attr("x", 0)
    .attr("height", y.bandwidth())
    .attr("width", function(d) { return x(d['Overall Rating']); })
    .attr("fill", function(d) { return color(d.Activity); });

// Append x and y axis
svg.append("g")
    .attr("class", "x-axis")
    .attr("transform", "translate(0," + height + ")")
    .call(d3.axisBottom(x))
    .append("text")
    .attr("x", width/2)
    .attr("y", 25)
    .attr("fill", "#000")
    .attr("font-weight", "bold")
    .text("Average Overall Rating");

svg.append("g")
    .attr("class", "y-axis")
    .call(d3.axisLeft(y));

// Title
svg.append("text")
    .attr("x", width / 2)
    .attr("y", -10)
    .attr("text-anchor", "middle")
    .attr("font-size", "18px")
    .attr("font-weight", "bold")
    .text("Activity vs. Average Rating");
</script>
"""

display(HTML(activity_avg_rating_horizontal_code))

**Box plot: Overall Rating across different Activities**

In [None]:
box_plot_code = """
<!DOCTYPE html>
<script src="https://d3js.org/d3.v5.min.js"></script>
<div id="d3-box-plot-container"></div>

<script>
var data = """ + str(data[['Overall Rating', 'Activity']].dropna().to_dict('records')) + """;

var margin = {top: 20, right: 50, bottom: 30, left: 50},
    width = 450 - margin.left - margin.right,
    height = 300 - margin.top - margin.bottom;

var activities = Array.from(new Set(data.map(function(d) { return d.Activity; })));

var x = d3.scaleBand()
          .domain(activities)
          .rangeRound([0, width])
          .padding(0.2);

var y = d3.scaleLinear()
          .domain([0, d3.max(data, function(d) { return d['Overall Rating']; })])
          .rangeRound([height, 0]);

var svg = d3.select("#d3-box-plot-container")
            .append("svg")
            .attr("width", width + margin.left + margin.right)
            .attr("height", height + margin.top + margin.bottom)
            .append("g")
            .attr("transform", "translate(" + margin.left + "," + margin.top + ")");

// Box plot
activities.forEach(function(activity) {
    var activityData = data.filter(function(d) { return d.Activity == activity; }).map(function(d) { return d['Overall Rating']; });
    activityData.sort(d3.ascending);
    var q1 = d3.quantile(activityData, .25);
    var median = d3.quantile(activityData, .5);
    var q3 = d3.quantile(activityData, .75);
    var interQuantileRange = q3 - q1;
    var min = q1 - 1.5 * interQuantileRange;
    var max = q3 + 1.5 * interQuantileRange;

    // Box
    svg.append("rect")
       .attr("x", x(activity))
       .attr("y", y(q3))
       .attr("width", x.bandwidth())
       .attr("height", (y(q1) - y(q3)))
       .attr("fill", "#69b3a2");

    // Line for min
    svg.append("line")
       .attr("x1", x(activity))
       .attr("x2", x(activity) + x.bandwidth())
       .attr("y1", y(min))
       .attr("y2", y(min))
       .attr("stroke", "black");

    // Line for median
    svg.append("line")
       .attr("x1", x(activity))
       .attr("x2", x(activity) + x.bandwidth())
       .attr("y1", y(median))
       .attr("y2", y(median))
       .attr("stroke", "black");

    // Line for max
    svg.append("line")
       .attr("x1", x(activity))
       .attr("x2", x(activity) + x.bandwidth())
       .attr("y1", y(max))
       .attr("y2", y(max))
       .attr("stroke", "black");
});

// X and Y axes
svg.append("g")
   .attr("transform", "translate(0," + height + ")")
   .call(d3.axisBottom(x));

svg.append("g")
   .call(d3.axisLeft(y));

// Title
svg.append("text")
   .attr("x", width / 2)
   .attr("y", -10)
   .attr("text-anchor", "middle")
   .attr("font-size", "16px")
   .attr("font-weight", "bold")
   .text("Box plot of Overall Rating across Activities");
</script>
"""

display(HTML(box_plot_code))

In [None]:
# Gender, Activity vs. Average Overall Rating
gender_activity_avg_rating = data.groupby(['Gender', 'Activity'])['Overall Rating'].mean().reset_index()

gender_activity_avg_rating

Unnamed: 0,Gender,Activity,Overall Rating
0,Female,Professional,3.45
1,Female,Student,3.22619
2,Male,Professional,3.4375
3,Male,Student,3.0


**Multivariate Visualization: Gender, Activity, and Average Overall Rating (Grouped Bar Chart)**

In [None]:
gender_activity_avg_rating_grouped_code = """
<!DOCTYPE html>
<script src="https://d3js.org/d3.v5.min.js"></script>
<div id="d3-gender-activity-rating-grouped-container"></div>

<script>
var data = """ + str(gender_activity_avg_rating.to_dict('records')) + """;

var margin = {top: 20, right: 60, bottom: 50, left: 100},
    width = 450 - margin.left - margin.right,
    height = 300 - margin.top - margin.bottom;

var activities = Array.from(new Set(data.map(function(d) { return d.Activity; })));

// Create an SVG canvas
var svg = d3.select("#d3-gender-activity-rating-grouped-container")
    .append("svg")
    .attr("width", width + margin.left + margin.right)
    .attr("height", height + margin.top + margin.bottom)
    .append("g")
    .attr("transform", "translate(" + margin.left + "," + margin.top + ")");

// Color scale
var color = d3.scaleOrdinal()
    .domain(["Male", "Female"])
    .range(["#1f77b4", "#ff7f0e"]);

// Create a scale for the x-axis
var x0 = d3.scaleBand()
    .domain(activities)
    .rangeRound([0, width])
    .paddingInner(0.1);

var x1 = d3.scaleBand()
    .domain(["Male", "Female"])
    .rangeRound([0, x0.bandwidth()])
    .padding(0.05);

var y = d3.scaleLinear()
    .domain([0, d3.max(data, function(d) { return d['Overall Rating']; })])
    .nice()
    .rangeRound([height, 0]);

// Append rectangles for the bar chart
var bars = svg.selectAll(".bar")
    .data(data)
    .enter().append("rect")
    .attr("x", function(d) { return x0(d.Activity) + x1(d.Gender); })
    .attr("y", function(d) { return y(d['Overall Rating']); })
    .attr("width", x1.bandwidth())
    .attr("height", function(d) { return height - y(d['Overall Rating']); })
    .attr("fill", function(d) { return color(d.Gender); });

// Append x and y axis
svg.append("g")
    .attr("class", "x-axis")
    .attr("transform", "translate(0," + height + ")")
    .call(d3.axisBottom(x0));

svg.append("g")
    .attr("class", "y-axis")
    .call(d3.axisLeft(y).ticks(5, "s"))
    .append("text")
    .attr("x", 2)
    .attr("y", y(y.ticks(5).pop()))
    .attr("dy", "0.35em")
    .attr("text-anchor", "start")
    .attr("fill", "#000")
    .attr("font-weight", "bold")
    .text("Average Overall Rating");

// Title
svg.append("text")
    .attr("x", width / 2)
    .attr("y", -10)
    .attr("text-anchor", "middle")
    .attr("font-size", "18px")
    .attr("font-weight", "bold")
    .text("Activity, Gender vs. Average Rating");

// Legend
var legend = svg.append("g")
    .attr("font-size", 10)
    .attr("text-anchor", "end")
    .selectAll("g")
    .data(["Male", "Female"].slice().reverse())
    .enter().append("g")
    .attr("transform", function(d, i) { return "translate(0," + i * 20 + ")"; });

legend.append("rect")
    .attr("x", width - 19)
    .attr("width", 19)
    .attr("height", 19)
    .attr("fill", color);

legend.append("text")
    .attr("x", width - 24)
    .attr("y", 9.5)
    .attr("dy", "0.32em")
    .text(function(d) { return d; });
</script>
"""

display(HTML(gender_activity_avg_rating_grouped_code))

**Multivariate Visualization: Scatter Plot for Overall Rating and Food Rating based on Gender**

In [None]:
scatter_plot_code = """
<!DOCTYPE html>
<script src="https://d3js.org/d3.v5.min.js"></script>
<div id="d3-scatter-plot-container"></div>

<script>
var data = """ + str(data[['Overall Rating', 'Food Rating', 'Gender']].dropna().to_dict('records')) + """;

var margin = {top: 20, right: 20, bottom: 40, left: 50},
    width = 400 - margin.left - margin.right,
    height = 300 - margin.top - margin.bottom;

var x = d3.scaleLinear()
          .domain([d3.min(data, function(d) { return d['Food Rating']; }), d3.max(data, function(d) { return d['Food Rating']; })])
          .range([0, width]);

var y = d3.scaleLinear()
          .domain([d3.min(data, function(d) { return d['Overall Rating']; }), d3.max(data, function(d) { return d['Overall Rating']; })])
          .range([height, 0]);

var color = d3.scaleOrdinal()
              .domain(["Male", "Female"])
              .range(["#1f77b4", "#ff7f0e"]);

var svg = d3.select("#d3-scatter-plot-container")
            .append("svg")
            .attr("width", width + margin.left + margin.right)
            .attr("height", height + margin.top + margin.bottom)
            .append("g")
            .attr("transform", "translate(" + margin.left + "," + margin.top + ")");

// Scatter plot circles
svg.selectAll("circle")
   .data(data)
   .enter().append("circle")
   .attr("cx", function(d) { return x(d['Food Rating']); })
   .attr("cy", function(d) { return y(d['Overall Rating']); })
   .attr("r", 4)
   .attr("fill", function(d) { return color(d.Gender); });

// X-axis
svg.append("g")
   .attr("transform", "translate(0," + height + ")")
   .call(d3.axisBottom(x))
   .append("text")
   .attr("x", width / 2)
   .attr("y", 30)
   .attr("fill", "#000")
   .attr("font-weight", "bold")
   .text("Food Rating");

// Y-axis
svg.append("g")
   .call(d3.axisLeft(y))
   .append("text")
   .attr("x", -height / 2)
   .attr("y", -35)
   .attr("transform", "rotate(-90)")
   .attr("fill", "#000")
   .attr("font-weight", "bold")
   .text("Overall Rating");

// Legend
var legend = svg.append("g")
                .attr("font-size", 10)
                .attr("text-anchor", "end")
                .selectAll("g")
                .data(["Male", "Female"].slice().reverse())
                .enter().append("g")
                .attr("transform", function(d, i) { return "translate(0," + i * 20 + ")"; });

legend.append("rect")
      .attr("x", width - 19)
      .attr("width", 19)
      .attr("height", 19)
      .attr("fill", color);

legend.append("text")
      .attr("x", width - 24)
      .attr("y", 9.5)
      .attr("dy", "0.32em")
      .text(function(d) { return d; });
</script>
"""

display(HTML(scatter_plot_code))

**Outlier Detection**

In [None]:
# Detecting outliers using IQR method for selected columns

columns_to_check = ['Overall Rating', 'Food Rating', 'Service Rating']
outliers = {}

for column in columns_to_check:
    Q1 = data[column].quantile(0.25)
    Q3 = data[column].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    # Define outliers
    outliers[column] = {
        "Lower Bound": lower_bound,
        "Upper Bound": upper_bound,
        "Outliers": data[(data[column] < lower_bound) | (data[column] > upper_bound)][column].values
    }

outliers

{'Overall Rating': {'Lower Bound': 0.25,
  'Upper Bound': 6.25,
  'Outliers': array([], dtype=float64)},
 'Food Rating': {'Lower Bound': -2.5,
  'Upper Bound': 9.5,
  'Outliers': array([], dtype=int64)},
 'Service Rating': {'Lower Bound': -2.5,
  'Upper Bound': 9.5,
  'Outliers': array([], dtype=int64)}}

**Feature Engineering**

In [None]:
# Creating new features: Average Rating and Rating Difference

data['Average Rating'] = (data['Food Rating'] + data['Service Rating']) / 2
data['Rating Difference'] = data['Food Rating'] - data['Service Rating']

# Displaying the first few rows with new features
data[['Food Rating', 'Service Rating', 'Average Rating', 'Rating Difference']].head()

Unnamed: 0,Food Rating,Service Rating,Average Rating,Rating Difference
0,5,4,4.5,1
1,1,1,1.0,0
2,5,5,5.0,0
3,3,1,2.0,2
4,2,4,3.0,-2


**Correlation Analysis**

In [None]:
# Correlation matrix for the numeric columns
correlation_matrix = data[['Overall Rating', 'Food Rating', 'Service Rating', 'Average Rating', 'Rating Difference']].corr()

correlation_matrix

Unnamed: 0,Overall Rating,Food Rating,Service Rating,Average Rating,Rating Difference
Overall Rating,1.0,0.709562,0.758532,1.0,-0.07829
Food Rating,0.709562,1.0,0.079056,0.709562,0.646928
Service Rating,0.758532,0.079056,1.0,0.758532,-0.709021
Average Rating,1.0,0.709562,0.758532,1.0,-0.07829
Rating Difference,-0.07829,0.646928,-0.709021,-0.07829,1.0


**Hypothesis Testing**

Gender-based T-test for Average Rating

In [None]:
from scipy.stats import ttest_ind

# Data for males and females
male_ratings = data[data['Gender'] == 'Male']['Overall Rating']
female_ratings = data[data['Gender'] == 'Female']['Overall Rating']

# T-test for gender-based average rating
t_stat_gender, p_value_gender = ttest_ind(male_ratings, female_ratings, equal_var=False, nan_policy='omit')

t_stat_gender, p_value_gender

(-1.1695643029807883, 0.24398092946288405)

Activity-based T-test for Average Rating

In [None]:
# Data for professionals and students
professional_ratings = data[data['Activity'] == 'Professional']['Overall Rating']
student_ratings = data[data['Activity'] == 'Student']['Overall Rating']

# T-test for activity-based average rating
t_stat_activity, p_value_activity = ttest_ind(professional_ratings, student_ratings, equal_var=False, nan_policy='omit')

t_stat_activity, p_value_activity

(2.3515456773510945, 0.01987187419347933)

**Concluding the Analysis**

From EDA, I've discovered:

Distribution patterns of ratings and other categorical features.

Multivariate relationships to understand how variables interact with each other.

Potential outliers and confirmed their absence for the columns we checked.

New features that offer additional insights.