In [1]:
import pandas as pd

# Load the datasets
red_wine = pd.read_csv('winequality-red.csv')
white_wine = pd.read_csv('winequality-white.csv')

# View the first few rows of each dataset
print("Red Wine Dataset Head:")
print(red_wine.head())
print("\nWhite Wine Dataset Head:")
print(white_wine.head())

# Check for missing values in each dataset
print("\nMissing Values in Red Wine Dataset:")
print(red_wine.isnull().sum())
print("\nMissing Values in White Wine Dataset:")
print(white_wine.isnull().sum())

# Add a 'type' column to each dataset
red_wine['type'] = 1
white_wine['type'] = 0

# Combine the datasets
combined_wine = pd.concat([red_wine, white_wine], ignore_index=True)

# Save the combined dataset to a new CSV file
combined_wine.to_csv('combined_winequality.csv', index=False)

print("\nCombined Dataset Head:")
print(combined_wine.head())


Red Wine Dataset Head:
  fixed acidity;"volatile acidity";"citric acid";"residual sugar";"chlorides";"free sulfur dioxide";"total sulfur dioxide";"density";"pH";"sulphates";"alcohol";"quality"
0   7.4;0.7;0;1.9;0.076;11;34;0.9978;3.51;0.56;9.4;5                                                                                                                     
1   7.8;0.88;0;2.6;0.098;25;67;0.9968;3.2;0.68;9.8;5                                                                                                                     
2  7.8;0.76;0.04;2.3;0.092;15;54;0.997;3.26;0.65;...                                                                                                                     
3  11.2;0.28;0.56;1.9;0.075;17;60;0.998;3.16;0.58...                                                                                                                     
4   7.4;0.7;0;1.9;0.076;11;34;0.9978;3.51;0.56;9.4;5                                                                           

In [3]:
combined_wine.head()


Unnamed: 0,"fixed acidity;""volatile acidity"";""citric acid"";""residual sugar"";""chlorides"";""free sulfur dioxide"";""total sulfur dioxide"";""density"";""pH"";""sulphates"";""alcohol"";""quality""",type
0,7.4;0.7;0;1.9;0.076;11;34;0.9978;3.51;0.56;9.4;5,1
1,7.8;0.88;0;2.6;0.098;25;67;0.9968;3.2;0.68;9.8;5,1
2,7.8;0.76;0.04;2.3;0.092;15;54;0.997;3.26;0.65;...,1
3,11.2;0.28;0.56;1.9;0.075;17;60;0.998;3.16;0.58...,1
4,7.4;0.7;0;1.9;0.076;11;34;0.9978;3.51;0.56;9.4;5,1


In [4]:
from tabulate import tabulate

# Display the combined DataFrame in a nicely formatted table
print("Combined Wine Dataset Head:")
print(tabulate(combined_wine.head(), headers='keys', tablefmt='pretty'))


Combined Wine Dataset Head:
+---+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------+
|   | fixed acidity;"volatile acidity";"citric acid";"residual sugar";"chlorides";"free sulfur dioxide";"total sulfur dioxide";"density";"pH";"sulphates";"alcohol";"quality" | type |
+---+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------+
| 0 |                                                            7.4;0.7;0;1.9;0.076;11;34;0.9978;3.51;0.56;9.4;5                                                             |  1   |
| 1 |                                                            7.8;0.88;0;2.6;0.098;25;67;0.9968;3.2;0.68;9.8;5                                                             |  1   |
| 2 |                                                    

In [5]:
combined_wine.head()

Unnamed: 0,"fixed acidity;""volatile acidity"";""citric acid"";""residual sugar"";""chlorides"";""free sulfur dioxide"";""total sulfur dioxide"";""density"";""pH"";""sulphates"";""alcohol"";""quality""",type
0,7.4;0.7;0;1.9;0.076;11;34;0.9978;3.51;0.56;9.4;5,1
1,7.8;0.88;0;2.6;0.098;25;67;0.9968;3.2;0.68;9.8;5,1
2,7.8;0.76;0.04;2.3;0.092;15;54;0.997;3.26;0.65;...,1
3,11.2;0.28;0.56;1.9;0.075;17;60;0.998;3.16;0.58...,1
4,7.4;0.7;0;1.9;0.076;11;34;0.9978;3.51;0.56;9.4;5,1


In [6]:
import pandas as pd

# Load the datasets with the correct delimiter and handle quotes
red_wine = pd.read_csv('winequality-red.csv', delimiter=';')
white_wine = pd.read_csv('winequality-white.csv', delimiter=';')

# View the first few rows of each dataset to ensure they are loaded correctly
print("Red Wine Dataset Head:")
print(red_wine.head())
print("\nWhite Wine Dataset Head:")
print(white_wine.head())

# Check for missing values in each dataset
print("\nMissing Values in Red Wine Dataset:")
print(red_wine.isnull().sum())
print("\nMissing Values in White Wine Dataset:")
print(white_wine.isnull().sum())

# Add a 'type' column to each dataset
red_wine['type'] = 1
white_wine['type'] = 0

# Combine the datasets
combined_wine = pd.concat([red_wine, white_wine], ignore_index=True)

# Save the combined dataset to a new CSV file
combined_wine.to_csv('combined_winequality.csv', index=False)

# Display the combined DataFrame in a nicely formatted table
print("\nCombined Dataset Head:")
print(combined_wine.head())



Red Wine Dataset Head:
   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0            7.4              0.70         0.00             1.9      0.076   
1            7.8              0.88         0.00             2.6      0.098   
2            7.8              0.76         0.04             2.3      0.092   
3           11.2              0.28         0.56             1.9      0.075   
4            7.4              0.70         0.00             1.9      0.076   

   free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
0                 11.0                  34.0   0.9978  3.51       0.56   
1                 25.0                  67.0   0.9968  3.20       0.68   
2                 15.0                  54.0   0.9970  3.26       0.65   
3                 17.0                  60.0   0.9980  3.16       0.58   
4                 11.0                  34.0   0.9978  3.51       0.56   

   alcohol  quality  
0      9.4        5  
1      9.8        5

In [7]:
import pandas as pd

# Load the datasets with the correct delimiter and handle quotes
red_wine = pd.read_csv('winequality-red.csv', delimiter=';')
white_wine = pd.read_csv('winequality-white.csv', delimiter=';')

# Get the number of instances in each dataset
red_wine_count = red_wine.shape[0]
white_wine_count = white_wine.shape[0]

print(f"Number of instances in red wine dataset: {red_wine_count}")
print(f"Number of instances in white wine dataset: {white_wine_count}")



Number of instances in red wine dataset: 1599
Number of instances in white wine dataset: 4898
