In [None]:
import requests
import pandas as pd
import time

def make_github_request(url, headers, params=None):
    """Make a GitHub API request with rate limit handling"""
    while True:
        try:
            response = requests.get(url, headers=headers, params=params)
            if response.status_code == 200:
                return response.json()
            elif response.status_code == 403:  # Rate limit exceeded
                reset_time = int(response.headers.get('X-RateLimit-Reset', 0))
                wait_time = max(reset_time - time.time(), 0)
                print(f"Rate limit exceeded. Waiting {wait_time:.0f} seconds...")
                time.sleep(wait_time + 1)
            else:
                print(f"Error {response.status_code}: {response.text}")
                return None
        except Exception as e:
            print(f"Request error: {str(e)}")
            return None

def clean_company_name(company):
    """Clean company names according to specifications"""
    if pd.isna(company) or company is None:
        return ""
    company = str(company).strip()
    if company.startswith('@'):
        company = company[1:]
    return company.upper()

def safe_get(data, *keys, default=''):
    """Safely get nested dictionary values"""
    for key in keys:
        data = data.get(key, None)
        if data is None:
            return default
    return data

def get_github_data():
    """Fetch GitHub data and create CSV files"""
    token = input("Please enter your GitHub token: ").strip()
    if not token:
        print("GitHub token is required.")
        return None, None

    headers = {
        'Authorization': f'token {token}',
        'Accept': 'application/vnd.github.v3+json'
    }

    print("🔍 Searching for users in Hyderabad...")

    users_data = []
    repos_data = []
    search_url = "https://api.github.com/search/users"
    params = {
        'q': 'location:Hyderabad followers:>50',
        'per_page': 100
    }

    page = 1  # Start pagination for user search

    while True:
        params['page'] = page
        search_results = make_github_request(search_url, headers, params)

        if not search_results:
            print("No more results or error encountered in user search.")
            break

        items = search_results.get('items', [])
        if not items:
            break  # No more items to paginate

        print(f"📊 Processing page {page} with {len(items)} users found...")

        for user in items:
            try:
                print(f"\n👤 Fetching data for user: {user.get('login', 'unknown')}")

                # Get user details
                user_data = make_github_request(user['url'], headers)
                if not user_data:
                    print(f"⚠️ Could not fetch details for user {user.get('login', 'unknown')}. Skipping.")
                    continue

                # Append user details to users_data
                users_data.append({
                    'login': safe_get(user_data, 'login'),
                    'name': safe_get(user_data, 'name'),
                    'company': clean_company_name(safe_get(user_data, 'company')),
                    'location': safe_get(user_data, 'location'),
                    'email': safe_get(user_data, 'email'),
                    'hireable': str(safe_get(user_data, 'hireable')).lower(),
                    'bio': safe_get(user_data, 'bio'),
                    'public_repos': safe_get(user_data, 'public_repos', default=0),
                    'followers': safe_get(user_data, 'followers', default=0),
                    'following': safe_get(user_data, 'following', default=0),
                    'created_at': safe_get(user_data, 'created_at')
                })

                # Get repositories
                repos_url = f"https://api.github.com/users/{user['login']}/repos"
                repo_page = 1
                while True:
                    repo_params = {
                        'sort': 'pushed',
                        'direction': 'desc',
                        'per_page': 100,
                        'page': repo_page
                    }
                    repos = make_github_request(repos_url, headers, repo_params)
                    if not repos:
                        print(f"No more repositories for {user.get('login', 'unknown')}.")
                        break

                    for repo in repos:
                        repos_data.append({
                            'login': safe_get(user_data, 'login'),
                            'full_name': safe_get(repo, 'full_name'),
                            'created_at': safe_get(repo, 'created_at'),
                            'stargazers_count': safe_get(repo, 'stargazers_count', default=0),
                            'watchers_count': safe_get(repo, 'watchers_count', default=0),
                            'language': safe_get(repo, 'language'),
                            'has_projects': str(safe_get(repo, 'has_projects')).lower(),
                            'has_wiki': str(safe_get(repo, 'has_wiki')).lower(),
                            'license_name': safe_get(repo, 'license', 'key', default='')
                        })

                    repo_page += 1

            except Exception as e:
                print(f"⚠️ Error processing user {user.get('login', 'unknown')}: {str(e)}")

        # Proceed to the next page for users
        page += 1

    if not users_data:
        print("❌ No user data collected.")
        return None, None

    if not repos_data:
        print("❌ No repository data collected.")
        return None, None

    print(f"\n💾 Creating CSV files for {len(users_data)} users and {len(repos_data)} repositories...")

    # Create DataFrames and save to CSV
    users_df = pd.DataFrame(users_data)
    repos_df = pd.DataFrame(repos_data)
    users_df.to_csv('/content/users.csv', index=False)
    repos_df.to_csv('/content/repositories.csv', index=False)

    return users_df, repos_df

def create_readme(users_df, repos_df):
    """Generate README.md with analysis"""
    print("📝 Generating README.md...")

    try:
        total_users = len(users_df)
        total_repos = len(repos_df)
        avg_followers = users_df['followers'].mean()
        top_languages = repos_df['language'].value_counts().head()
        companies = users_df['company'].value_counts().head()

        total_repos_with_language = len(repos_df[repos_df['language'].notna()])
        python_repos = len(repos_df[repos_df['language'] == 'Python'])
        js_repos = len(repos_df[repos_df['language'] == 'JavaScript'])

        python_percent = (python_repos / total_repos_with_language * 100) if total_repos_with_language > 0 else 0
        js_percent = (js_repos / total_repos_with_language * 100) if total_repos_with_language > 0 else 0
        wiki_percent = (len(repos_df[repos_df['has_wiki'] == 'false']) / len(repos_df) * 100) if len(repos_df) > 0 else 0
        license_percent = (len(repos_df[repos_df['license_name'] == '']) / len(repos_df) * 100) if len(repos_df) > 0 else 0

        readme_content = f"""# GitHub Users Analysis - Hyderabad

* Data was collected using GitHub's REST API v3, focusing on Hyderabad-based developers with 50+ followers, analyzing their profiles and recent repositories with rate limiting and error handling.

* Analysis reveals that Python is the dominant language choice among Hyderabad's top developers, with {python_percent:.1f}% of repositories using it, followed by JavaScript at {js_percent:.1f}% .

* Developers should focus on better documentation and licensing - {wiki_percent:.1f}% of repositories lack wikis and {license_percent:.1f}% lack explicit licenses, which can limit project adoption and collaboration.

## Analysis Details

### User Statistics
- Total Users Analyzed: {total_users}
- Total Repositories: {total_repos}
- Average Followers per User: {avg_followers:.1f}

### Top Programming Languages

### Most Common Companies
"""

        with open('/content/README.md', 'w') as f:
            f.write(readme_content)

    except Exception as e:
        print(f"⚠️ Error generating README: {str(e)}")
        with open('/content/README.md', 'w') as f:
            f.write("# GitHub Users Analysis\n\nError occurred during analysis.")

def main():
    print("🚀 Starting GitHub Analysis...")
    users_df, repos_df = get_github_data()
    if users_df is not None and repos_df is not None:
        create_readme(users_df, repos_df)
        print("\n✅ Analysis complete! Check users.csv, repositories.csv, and README.md in /content directory.")
    else:
        print("\n❌ Analysis failed!")

if __name__ == "__main__":
    main()


🚀 Starting GitHub Analysis...
Please enter your GitHub token: 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
🔍 Searching for users in Hyderabad...
📊 Processing page 1 with 100 users found...

👤 Fetching data for user: iam-veeramalla
No more repositories for iam-veeramalla.

👤 Fetching data for user: in28minutes
No more repositories for in28minutes.

👤 Fetching data for user: stacksimplify
No more repositories for stacksimplify.

👤 Fetching data for user: thenaveensaggam
No more repositories for thenaveensaggam.

👤 Fetching data for user: MadhavBahl
No more repositories for MadhavBahl.

👤 Fetching data for user: sivaprasadreddy
No more repositories for sivaprasadreddy.

👤 Fetching data for user: ashokitschool
No more repositories for ashokitschool.

👤 Fetching data for user: Shahzaib-D-Memon
No more repositories for Shahzaib-D-Memon.

👤 Fetching data for user: codewithdev
No more repositories for codewithdev.

👤 Fetching data for user: NotHarshhaa
No more repositories for NotHarshhaa.

👤 Fetc

In [None]:
#more cleaned users.csv
import pandas as pd

# Load the data
users_df = pd.read_csv('users.csv')

# Clean the company names
users_df['company'] = users_df['company'].str.strip()  # Trim whitespace
users_df['company'] = users_df['company'].str.lstrip('@')  # Strip leading '@'
users_df['company'] = users_df['company'].str.upper()  # Convert to uppercase

# Save the cleaned DataFrame back to users.csv
users_df.to_csv('users.csv', index=False)

print("Company names cleaned and saved to users.csv.")


Company names cleaned and saved to users.csv.


In [None]:
#repositories.csv
import requests
import csv

# Replace with your personal access token
GITHUB_TOKEN = 'GITHUB_TOKEN'
HEADERS = {'Authorization': f'token {GITHUB_TOKEN}'}
BASE_URL = 'https://api.github.com'

def read_users_from_csv(file_path):
    users = []
    with open(file_path, 'r', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            users.append(row['login'])
    return users

def get_repositories(username):
    repos = []
    page = 1
    while True:
        response = requests.get(f'{BASE_URL}/users/{username}/repos',
                                headers=HEADERS,
                                params={'sort': 'pushed', 'direction': 'desc', 'per_page': 100, 'page': page})
        data = response.json()

        if not data or len(repos) >= 500:
            break

        for repo in data:
            repos.append({
                'full_name': repo['full_name'],
                'created_at': repo['created_at'],
                'stargazers_count': repo['stargazers_count'],
                'watchers_count': repo['watchers_count'],
                'language': repo['language'],
                'has_projects': repo['has_projects'],
                'has_wiki': repo['has_wiki'],
                'license_name': repo['license']['key'] if repo.get('license') else None  # Safely fetch license key
            })

        page += 1

    return repos[:500]  # Return up to 500 repos

def main():
    users = read_users_from_csv('users.csv')
    all_repos = []

    for user in users:
        repos = get_repositories(user)
        for repo in repos:
            all_repos.append({
                'login': user,
                **repo
            })

    # Write to CSV
    with open('repositories.csv', 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['login', 'full_name', 'created_at',
                      'stargazers_count', 'watchers_count',
                      'language', 'has_projects',
                      'has_wiki', 'license_name']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for repo in all_repos:
            writer.writerow(repo)

if __name__ == '__main__':
    main()


TypeError: string indices must be integers

In [None]:
import pandas as pd
import statsmodels.api as sm

# Load the users data from the CSV file
users_df = pd.read_csv('users.csv')

# Filter out users without bios
users_with_bios = users_df[users_df['bio'].notna()]

# Calculate the length of the bio in words
#users_with_bios['bio_word_count'] = users_with_bios['bio'].str.split(" ").str.len()

# The error was here: users_with_bio was used instead of users_with_bios
users_with_bios['bio_word_count'] = users_with_bios['bio'].apply(lambda x: len(x.split()))


# Prepare the data for regression
X = users_with_bios['bio_word_count']  # Independent variable
y = users_with_bios['followers']        # Dependent variable

# Add a constant to the independent variable for the regression
X = sm.add_constant(X)

# Fit the regression model
model = sm.OLS(y, X).fit()

# Get the regression slope (coefficient for bio_word_count)
slope = model.params['bio_word_count']

# Print the slope rounded to three decimal places
print(f'Regression slope of followers on bio word count: {slope:.3f}')

Regression slope of followers on bio word count: 8.393


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  users_with_bios['bio_word_count'] = users_with_bios['bio'].apply(lambda x: len(x.split()))


In [None]:
#Q1
import pandas as pd

# Load the data
users_df = pd.read_csv('users.csv')

# Sort by followers and get top 5
top_users = users_df.sort_values(by='followers', ascending=False).head(5)

# Extract logins
top_logins = top_users['login'].tolist()
result = ', '.join(top_logins)

print(result)


iam-veeramalla, in28minutes, stacksimplify, thenaveensaggam, MadhavBahl


In [None]:
#Q2
import pandas as pd

# Load the data
users_df = pd.read_csv('users.csv')

# Convert created_at to datetime
users_df['created_at'] = pd.to_datetime(users_df['created_at'])

# Sort by created_at and get the earliest 5 users
earliest_users = users_df.sort_values(by='created_at').head(5)

# Extract logins
earliest_logins = earliest_users['login'].tolist()
result = ', '.join(earliest_logins)

print(result)


shabda, sitaramc, bagwanpankaj, srikanthlogic, kulbirsaini


In [None]:
#Q3
import pandas as pd

# Load the data
repositories_df = pd.read_csv('repositories.csv')

# Filter out missing license names
repositories_df = repositories_df[repositories_df['license_name'].notna()]

# Count occurrences of each license
license_counts = repositories_df['license_name'].value_counts()

# Get the top 3 licenses
top_licenses = license_counts.head(3).index.tolist()

# Join the license names in order
result = ', '.join(top_licenses)

print(result)


mit, apache-2.0, other


In [None]:
#Q4
import pandas as pd

# Load the data
users_df = pd.read_csv('users.csv')

# Count occurrences of each company
company_counts = users_df['company'].value_counts()

# Get the company with the highest count
most_common_company = company_counts.idxmax()
most_common_count = company_counts.max()

print(f"The majority of developers work at: {most_common_company} with {most_common_count} developers.")


The majority of developers work at: MICROSOFT with 17 developers.


In [None]:
#Q5
import pandas as pd

# Load the data
repositories_df = pd.read_csv('repositories.csv')

# Count occurrences of each programming language, ignoring missing values
language_counts = repositories_df['language'].value_counts()

# Get the most popular programming language
most_popular_language = language_counts.idxmax()
most_popular_count = language_counts.max()

print(f"The most popular programming language is: {most_popular_language} with {most_popular_count} repositories.")


The most popular programming language is: JavaScript with 5762 repositories.


In [None]:
#Q6
import pandas as pd

# Load the data
users_df = pd.read_csv('users.csv')
repositories_df = pd.read_csv('repositories.csv')

# Convert created_at to datetime and filter users who joined after 2020
users_df['created_at'] = pd.to_datetime(users_df['created_at'])
recent_users = users_df[users_df['created_at'] > '2020-01-01']

# Get the logins of recent users
recent_user_logins = recent_users['login'].tolist()

# Filter repositories by these users
recent_repositories = repositories_df[repositories_df['login'].isin(recent_user_logins)]

# Count occurrences of each programming language
language_counts = recent_repositories['language'].value_counts()

# Get the second most popular programming language
second_most_popular_language = language_counts.nlargest(2).index[1]
second_most_popular_count = language_counts.nlargest(2).values[1]

print(f"The second most popular programming language among users who joined after 2020 is: {second_most_popular_language} with {second_most_popular_count} repositories.")


The second most popular programming language among users who joined after 2020 is: HTML with 777 repositories.


In [None]:
#Q7
import pandas as pd

# Load the data
repositories_df = pd.read_csv('repositories.csv')

# Group by programming language and calculate the average stars
average_stars = repositories_df.groupby('language')['stargazers_count'].mean()

# Identify the language with the highest average stars
highest_average_language = average_stars.idxmax()
highest_average_value = average_stars.max()

print(f"The programming language with the highest average number of stars per repository is: {highest_average_language} with an average of {highest_average_value:.2f} stars.")


The programming language with the highest average number of stars per repository is: Perl with an average of 195.52 stars.


In [None]:
#Q8
import pandas as pd

# Load the data
users_df = pd.read_csv('users.csv')

# Calculate leader_strength
users_df['leader_strength'] = users_df['followers'] / (1 + users_df['following'])

# Sort by leader_strength and get the top 5
top_leaders = users_df.sort_values(by='leader_strength', ascending=False).head(5)

# Extract logins
top_logins = top_leaders['login'].tolist()
result = ', '.join(top_logins)

print(result)


in28minutes, iam-veeramalla, stacksimplify, ashokitschool, thenaveensaggam


In [None]:
#Q9
import pandas as pd

# Load the data
users_df = pd.read_csv('users.csv')

# Calculate the correlation between followers and public repositories
correlation = users_df['followers'].corr(users_df['public_repos'])

print(f"The correlation between the number of followers and the number of public repositories is: {correlation:.3f}")


The correlation between the number of followers and the number of public repositories is: 0.006


In [None]:
#Q10
import pandas as pd
import statsmodels.api as sm

# Load the data
users_df = pd.read_csv('users.csv')

# Define the independent variable (X) and dependent variable (Y)
X = users_df['public_repos']
Y = users_df['followers']

# Add a constant to the independent variable (for the intercept)
X = sm.add_constant(X)

# Fit the regression model
model = sm.OLS(Y, X).fit()

# Get the summary of the regression results
summary = model.summary()

# Extract the coefficient for public_repos
additional_followers_per_repo = model.params['public_repos']

print(f"Regression Results:\n{summary}")
print(f"Estimated additional followers per additional public repository: {additional_followers_per_repo:.3f}")


Regression Results:
                            OLS Regression Results                            
Dep. Variable:              followers   R-squared:                       0.000
Model:                            OLS   Adj. R-squared:                 -0.002
Method:                 Least Squares   F-statistic:                   0.01983
Date:                Wed, 30 Oct 2024   Prob (F-statistic):              0.888
Time:                        12:42:56   Log-Likelihood:                -4188.1
No. Observations:                 504   AIC:                             8380.
Df Residuals:                     502   BIC:                             8389.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
const          201.3644     

In [None]:
#Q11
import pandas as pd

# Load the data
repositories_df = pd.read_csv('repositories.csv')


# Calculate the correlation directly
correlation = repositories_df['has_projects'].astype(int).corr(repositories_df['has_wiki'].astype(int))

print(f"The correlation between having projects enabled and having a wiki enabled is: {correlation:.3f}")


The correlation between having projects enabled and having a wiki enabled is: 0.171


In [None]:
#Q12
import pandas as pd

# Load the users data from the CSV file
users_df = pd.read_csv('users.csv')

# Filter hireable and non-hireable users
hireable_users = users_df[users_df['hireable'] == True]
non_hireable_users = users_df[users_df['hireable'].isna() | (users_df['hireable'] == False)]

# Calculate average following for both groups
average_hireable_following = hireable_users['following'].mean()
average_non_hireable_following = non_hireable_users['following'].mean()

# Calculate the difference
difference = average_hireable_following - average_non_hireable_following

# Print the result rounded to three decimal places
print(f'Difference in average following (hireable - non-hireable): {difference:.3f}')


Difference in average following (hireable - non-hireable): 33.270


In [None]:
#Q13
import pandas as pd
import statsmodels.api as sm

# Load the users data from the CSV file
users_df = pd.read_csv('users.csv')

# Filter out users without bios
users_with_bios = users_df[users_df['bio'].notna()]

# Calculate the length of the bio in words
#users_with_bios['bio_word_count'] = users_with_bios['bio'].str.split(" ").str.len()

users_with_bios['bio_word_count'] = users_with_bio['bio'].apply(lambda x: len(x.split()))


# Prepare the data for regression
X = users_with_bios['bio_word_count']  # Independent variable
y = users_with_bios['followers']        # Dependent variable

# Add a constant to the independent variable for the regression
X = sm.add_constant(X)

# Fit the regression model
model = sm.OLS(y, X).fit()

# Get the regression slope (coefficient for bio_word_count)
slope = model.params['bio_word_count']

# Print the slope rounded to three decimal places
print(f'Regression slope of followers on bio word count: {slope:.3f}')


NameError: name 'users_with_bio' is not defined

In [None]:
#Q14
import pandas as pd

# Load the repositories data from the CSV file
repos_df = pd.read_csv('repositories.csv')

# Convert the created_at column to datetime
repos_df['created_at'] = pd.to_datetime(repos_df['created_at'])

# Filter for weekend days (Saturday: 5, Sunday: 6)
weekend_repos = repos_df[repos_df['created_at'].dt.dayofweek.isin([5, 6])]

# Count the number of repositories created by each user
top_users = weekend_repos['login'].value_counts().head(5)

# Get the top 5 users' logins in order
top_users_logins = ', '.join(top_users.index)

# Print the result
print(f'Top 5 users who created the most repositories on weekends: {top_users_logins}')


Top 5 users who created the most repositories on weekends: anjijava16, hemanth22, Shekharrajak, wahidKhan74, elevenpassin


In [None]:
#Q15
import pandas as pd

# Load the users data from the CSV file
users_df = pd.read_csv('users.csv')

# Total number of users
total_users = len(users_df)

# Filter hireable and non-hireable users
hireable_users = users_df[users_df['hireable'] == True]
non_hireable_users = users_df[users_df['hireable'].isna() | (users_df['hireable'] == False)]

# Calculate the fraction of users with email in both groups
fraction_hireable_with_email = hireable_users['email'].notna().mean()
fraction_non_hireable_with_email = non_hireable_users['email'].notna().mean()

# Calculate the difference
difference = fraction_hireable_with_email - fraction_non_hireable_with_email

# Print the result rounded to three decimal places
print(f'Difference in fraction of users with email: {difference:.3f}')


Difference in fraction of users with email: 0.258


In [None]:
#Q16
import pandas as pd

# Load the users data from the CSV file
users_df = pd.read_csv('users.csv')

# Filter out users without names
valid_users = users_df[users_df['name'].notna()]

# Extract surnames (last word in name)
valid_users['surname'] = valid_users['name'].str.strip().str.split().str[-1]

# Count occurrences of each surname
surname_counts = valid_users['surname'].value_counts()

# Find the most common surname(s)
max_count = surname_counts.max()
most_common_surnames = surname_counts[surname_counts == max_count].index.tolist()

# Sort surnames alphabetically
most_common_surnames.sort()

# Count users with the most common surname
number_of_users = max_count

# Print results
most_common_surnames_str = ', '.join(most_common_surnames)
print(f'Most common surname(s): {most_common_surnames_str}')
print(f'Number of users with the most common surname: {number_of_users}')


Most common surname(s): Kumar
Number of users with the most common surname: 12


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  valid_users['surname'] = valid_users['name'].str.strip().str.split().str[-1]


Wrong Answers In Q11, 12, 15 > because In Original Files, Booleans are True/False . We have to upload Booleans as true/false in github repo. Run Scripts below and then upload resulting files to github.

In [None]:

repositories_df = pd.read_csv('repositories.csv')

# Check the data types and structure
print(repositories_df.head())

# Replace True/False with true/false
repositories_df['has_projects'] = repositories_df['has_projects'].replace({True: 'true', False: 'false'})
repositories_df['has_wiki'] = repositories_df['has_wiki'].replace({True: 'true', False: 'false'})

# Save the modified DataFrame back to the same CSV file
repositories_df.to_csv('repositories.csv', index=False)

# Check the data types and structure
print(repositories_df.head())

print("Updated CSV file saved successfully.")


         login                           full_name            created_at  \
0  krishnaik06  krishnaik06/Polars-GPU-Engine-Demo  2024-10-28T08:42:45Z   
1  krishnaik06  krishnaik06/Transformers-Materials  2024-10-19T15:43:30Z   
2  krishnaik06              krishnaik06/ETLWeather  2024-10-15T10:45:09Z   
3  krishnaik06                krishnaik06/datasets  2024-10-08T05:16:12Z   
4  krishnaik06             krishnaik06/testdagshub  2024-10-01T16:57:42Z   

   stargazers_count  watchers_count          language  has_projects  has_wiki  \
0                 1               1  Jupyter Notebook          True      True   
1                10              10               NaN          True      True   
2                 9               9            Python          True      True   
3                 3               3               NaN          True      True   
4                 0               0               NaN          True      True   

  license_name  
0      gpl-3.0  
1      gpl-3.0  
2    

In [None]:
users_df = pd.read_csv('users.csv')

# Check the data types and structure
print(users_df.head())

# Replace True/False with true/false in the hireable column
users_df['hireable'] = users_df['hireable'].replace({True: 'true', False: 'false'})

# Save the modified DataFrame back to the same CSV file
users_df.to_csv('users.csv', index=False)

# Check the data types and structure
print(users_df.head())


print("Updated CSV file saved successfully.")



             login           name          company          location  \
0      krishnaik06   Krish C Naik    PANASONIC IIC         Bangalore   
1  championswimmer    Arnav Gupta        JIOCINEMA  Bangalore, India   
2    arpitbbhayani  Arpit Bhayani           DICEDB         Bangalore   
3    manjunath5496    Manjunath.R  MYW3SCHOOLS.COM  Bangalore, India   
4      tanaypratap   Tanay Pratap           INVACT  Bangalore, India   

                     email hireable  \
0    krishnaik06@gmail.com      NaN   
1   dev@championswimmer.in     True   
2                      NaN     True   
3  manjunath5496@gmail.com      NaN   
4      tanay.mit@gmail.com      NaN   

                                                 bio  public_repos  followers  \
0  Data Scientist with ML and Deep  Learning expe...           330      30906   
1  Director of Engineering - @JioCinema ┃\r\nPast...           351       4661   
2  Creator of @DiceDB • ex-Google Dataproc, ex-Am...           188       4557   
3  "Scie