Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New lint: [duplicate_map_keys] #12575

Draft
wants to merge 8 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Expand Up @@ -5209,6 +5209,7 @@ Released 2018-09-13
[`drop_copy`]: https://rust-lang.github.io/rust-clippy/master/index.html#drop_copy
[`drop_non_drop`]: https://rust-lang.github.io/rust-clippy/master/index.html#drop_non_drop
[`drop_ref`]: https://rust-lang.github.io/rust-clippy/master/index.html#drop_ref
[`duplicate_map_keys`]: https://rust-lang.github.io/rust-clippy/master/index.html#duplicate_map_keys
[`duplicate_mod`]: https://rust-lang.github.io/rust-clippy/master/index.html#duplicate_mod
[`duplicate_underscore_argument`]: https://rust-lang.github.io/rust-clippy/master/index.html#duplicate_underscore_argument
[`duplicated_attributes`]: https://rust-lang.github.io/rust-clippy/master/index.html#duplicated_attributes
Expand Down
1 change: 1 addition & 0 deletions clippy_lints/src/declared_lints.rs
Expand Up @@ -154,6 +154,7 @@ pub(crate) static LINTS: &[&crate::LintInfo] = &[
crate::drop_forget_ref::DROP_NON_DROP_INFO,
crate::drop_forget_ref::FORGET_NON_DROP_INFO,
crate::drop_forget_ref::MEM_FORGET_INFO,
crate::duplicate_map_keys::DUPLICATE_MAP_KEYS_INFO,
crate::duplicate_mod::DUPLICATE_MOD_INFO,
crate::else_if_without_else::ELSE_IF_WITHOUT_ELSE_INFO,
crate::empty_drop::EMPTY_DROP_INFO,
Expand Down
90 changes: 90 additions & 0 deletions clippy_lints/src/duplicate_map_keys.rs
@@ -0,0 +1,90 @@
use clippy_utils::diagnostics::span_lint_and_note;
use clippy_utils::ty::is_type_diagnostic_item;
use clippy_utils::{last_path_segment, SpanlessEq};
use rustc_hir::ExprKind;
use rustc_lint::{LateContext, LateLintPass};
use rustc_session::declare_lint_pass;
use rustc_span::symbol::sym;

declare_clippy_lint! {
/// ### What it does
/// When two items are inserted into a `HashMap` with the same key,
/// the second item will overwrite the first item.
///
/// ### Why is this bad?
/// This can lead to data loss.
///
/// ### Example
/// ```no_run
/// # use std::collections::HashMap;
/// let example = HashMap::from([(5, 1), (5, 2)]);
/// ```
#[clippy::version = "1.79.0"]
pub DUPLICATE_MAP_KEYS,
suspicious,
"`HashMap` with duplicate keys loses data"
}

declare_lint_pass!(DuplicateMapKeys => [DUPLICATE_MAP_KEYS]);

impl<'tcx> LateLintPass<'tcx> for DuplicateMapKeys {
fn check_expr(&mut self, cx: &LateContext<'tcx>, expr: &'tcx rustc_hir::Expr<'tcx>) {
if has_hash_collision(cx, expr).is_some_and(|v| !v.is_empty()) {
span_lint_and_note(
cx,
DUPLICATE_MAP_KEYS,
expr.span,
"this `HashMap` uses one key for multiple items and will lose data",
None,
"consider using a different keys for all items",
);
}
}
}

// TODO: Also check for different sources of hash collisions
// TODO: Maybe other types of hash maps should be checked as well?
fn has_hash_collision<'a>(
cx: &LateContext<'_>,
expr: &'a rustc_hir::Expr<'_>,
) -> Option<Vec<(rustc_hir::Expr<'a>, rustc_hir::Expr<'a>)>> {
// If the expression is a call to `HashMap::from`, check if the keys are the same
if let ExprKind::Call(func, args) = &expr.kind
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if let ExprKind::Call(func, args) = &expr.kind
if let ExprKind::Call(func, [arg]) = &expr.kind

Matching a 1-element slice would let you avoid the args.len() == 1 check and index later.

// First check for HashMap::from
&& let ty = cx.typeck_results().expr_ty(expr)
&& is_type_diagnostic_item(cx, ty, sym::HashMap)
&& let ExprKind::Path(func_path) = func.kind
&& last_path_segment(&func_path).ident.name == sym::from
// There should be exactly one argument to HashMap::from
&& args.len() == 1
// which should be an array
&& let ExprKind::Array(args) = &args[0].kind
// Then check the keys
{
// Put all keys in a vector
let mut keys = Vec::new();

for arg in *args {
//
if let ExprKind::Tup(args) = &arg.kind
&& !args.is_empty()
// && let ExprKind::Lit(lit) = args[0].kind
{
keys.push(args[0]);
}
}
Comment on lines +65 to +75
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
let mut keys = Vec::new();
for arg in *args {
//
if let ExprKind::Tup(args) = &arg.kind
&& !args.is_empty()
// && let ExprKind::Lit(lit) = args[0].kind
{
keys.push(args[0]);
}
}
let keys = args.iter().filter_map(|arg| {
if let ExprKind::Tup([key, ..]) = arg.kind {
Some(key)
} else {
None
}
}).collect::<Vec<_>>();


// Check if there are any duplicate keys
let mut duplicate = Vec::new();
let mut spannless_eq = SpanlessEq::new(cx);
for i in 0..keys.len() {
for j in i + 1..keys.len() {
Comment on lines +80 to +81
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a quadratic algorithm that will slow down considerably with larger sets (try a thousand elements). A better way would be to use a SpanlessHash per item and add them to a HashSet, which is linear.

if spannless_eq.eq_expr(&keys[i], &keys[j]) {
duplicate.push((keys[i], keys[j]));
}
}
}
return Some(duplicate);
}
None
}
2 changes: 2 additions & 0 deletions clippy_lints/src/lib.rs
Expand Up @@ -121,6 +121,7 @@ mod disallowed_types;
mod doc;
mod double_parens;
mod drop_forget_ref;
mod duplicate_map_keys;
mod duplicate_mod;
mod else_if_without_else;
mod empty_drop;
Expand Down Expand Up @@ -1129,6 +1130,7 @@ pub fn register_lints(store: &mut rustc_lint::LintStore, conf: &'static Conf) {
store.register_late_pass(|_| Box::new(zero_repeat_side_effects::ZeroRepeatSideEffects));
store.register_late_pass(|_| Box::new(manual_unwrap_or_default::ManualUnwrapOrDefault));
store.register_late_pass(|_| Box::new(integer_division_remainder_used::IntegerDivisionRemainderUsed));
store.register_late_pass(|_| Box::new(duplicate_map_keys::DuplicateMapKeys));
// add lints here, do not remove this comment, it's used in `new_lint`
}

Expand Down
38 changes: 38 additions & 0 deletions tests/ui/duplicate_map_keys.rs
@@ -0,0 +1,38 @@
#![warn(clippy::duplicate_map_keys)]
#![allow(clippy::diverging_sub_expression)]
use std::collections::HashMap;
use std::hash::{Hash, Hasher};

fn main() -> Result<(), ()> {
let _ = HashMap::from([(5, 1), (5, 2)]); // expect lint
let _ = HashMap::from([((), 1), ((), 2)]); // expect lint
let _ = HashMap::from([("a", 1), ("a", 2)]); // expect lint

let _ = HashMap::from([(return Ok(()), 1), (return Err(()), 2)]); // expect no lint
// is this what you meant? I'm not sure how to put return into there

let _ = HashMap::from([(5, 1), (6, 2)]); // expect no lint
let _ = HashMap::from([("a", 1), ("b", 2)]); // expect no lint

let _ = HashMap::from([(Bad(true), 1), (Bad(false), 2)]); // expect no lint (false negative)

Ok(())
}

// Construction of false negative
struct Bad(bool);

// eq is used in the lint, but hash in the HashMap
impl Eq for Bad {}

impl PartialEq for Bad {
fn eq(&self, other: &Self) -> bool {
true
}
}

impl Hash for Bad {
fn hash<H: Hasher>(&self, state: &mut H) {
// empty
}
}
28 changes: 28 additions & 0 deletions tests/ui/duplicate_map_keys.stderr
@@ -0,0 +1,28 @@
error: this `HashMap` uses one key for multiple items and will lose data
--> tests/ui/duplicate_map_keys.rs:7:13
|
LL | let _ = HashMap::from([(5, 1), (5, 2)]); // expect lint
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
= note: consider using a different keys for all items
= note: `-D clippy::duplicate-map-keys` implied by `-D warnings`
= help: to override `-D warnings` add `#[allow(clippy::duplicate_map_keys)]`

error: this `HashMap` uses one key for multiple items and will lose data
--> tests/ui/duplicate_map_keys.rs:8:13
|
LL | let _ = HashMap::from([((), 1), ((), 2)]); // expect lint
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
= note: consider using a different keys for all items

error: this `HashMap` uses one key for multiple items and will lose data
--> tests/ui/duplicate_map_keys.rs:9:13
|
LL | let _ = HashMap::from([("a", 1), ("a", 2)]); // expect lint
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
= note: consider using a different keys for all items

error: aborting due to 3 previous errors