# Salary Dataset Analysis
This project uses different Python Libraries to analyze salaries based on experience
1. Import libraries needed for project
2. Read CSV data extracted from Kaggle "Salary_Dataset"
3. Create and display the scatter plot
4. View descriptive information of the dataset
5. Discuss findings (see README file)

In [None]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Read salary_data
salary_data = pd.read_csv('salary_dataset.csv')
salary_data.info()

In [None]:
# Rename first column - for readibility
salary_data.rename(columns={'Unnamed: 0': 'PersonID', 'YearsExperience': 'Years_Experience'}, inplace=True)
salary_data.info()

In [None]:
# Create scatter plot chart
point_color = '#1f77b4'
line_color = '#000000'

# Define correlation and r-squared values
review_corr = salary_data[['Years_Experience', 'Salary']].corr().iloc[0,1]
review_r2 = review_corr ** 2

# Define variables
x_variable = 'Years_Experience'
y_variable = 'Salary'

# Create scatter plot
sns.lmplot(
    data=salary_data,
    x= x_variable,
    y= y_variable,
    height=7,
    aspect=1.5,
    scatter_kws={'color': point_color, 'alpha': 0.6},
    line_kws={'color': line_color}
)

# Generate scatter plot labels
plt.title(f'{x_variable} vs {y_variable}\n(r= {review_corr:.2f}, r² = {review_r2:.2f})', pad=20)
plt.xlabel(x_variable)
plt.ylabel(y_variable)

# Display scatter plot
plt.tight_layout
plt.show()

In [None]:
# Supplimentary Information

salary_data[['Years_Experience', 'Salary']].describe()